diff options
479 files changed, 22096 insertions, 6407 deletions
diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp index 0b2e63b..874419f 100644 --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -500,7 +500,7 @@ bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const { const FDE &CurFDE = *I->second; std::optional<uint64_t> LSDA = CurFDE.getLSDAAddress(); - Function.setLSDAAddress(LSDA ? *LSDA : 0); + Function.setLSDAAddress(LSDA.value_or(0)); uint64_t Offset = Function.getFirstInstructionOffset(); uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor(); diff --git a/clang-tools-extra/clang-tidy/.clang-tidy b/clang-tools-extra/clang-tidy/.clang-tidy index 2443c97..0a2ea24 100644 --- a/clang-tools-extra/clang-tidy/.clang-tidy +++ b/clang-tools-extra/clang-tidy/.clang-tidy @@ -22,7 +22,6 @@ Checks: > -performance-unnecessary-value-param, readability-*, -readability-avoid-nested-conditional-operator, - -readability-avoid-return-with-void-value, -readability-braces-around-statements, -readability-container-contains, -readability-convert-member-functions-to-static, diff --git a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp index 8b2ca69..5378223 100644 --- a/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/NarrowingConversionsCheck.cpp @@ -381,10 +381,11 @@ void NarrowingConversionsCheck::diagNarrowTypeOrConstant( const Expr &Rhs) { APValue Constant = getConstantExprValue(Context, Rhs); if (Constant.isInt()) - return diagNarrowIntegerConstant(SourceLoc, Lhs, Rhs, Constant.getInt()); - if (Constant.isFloat()) - return diagNarrowConstant(SourceLoc, Lhs, Rhs); - return diagNarrowType(SourceLoc, Lhs, Rhs); + diagNarrowIntegerConstant(SourceLoc, Lhs, Rhs, Constant.getInt()); + else if (Constant.isFloat()) + diagNarrowConstant(SourceLoc, Lhs, Rhs); + else + diagNarrowType(SourceLoc, Lhs, Rhs); } void NarrowingConversionsCheck::handleIntegralCast(const ASTContext &Context, @@ -460,10 +461,10 @@ void NarrowingConversionsCheck::handleFloatingToIntegral( llvm::APFloat FloatConstant(0.0); if (getFloatingConstantExprValue(Context, Rhs, FloatConstant)) { if (!isFloatExactlyRepresentable(Context, FloatConstant, Lhs.getType())) - return diagNarrowConstant(SourceLoc, Lhs, Rhs); + diagNarrowConstant(SourceLoc, Lhs, Rhs); - if (PedanticMode) - return diagConstantCast(SourceLoc, Lhs, Rhs); + else if (PedanticMode) + diagConstantCast(SourceLoc, Lhs, Rhs); return; } @@ -478,7 +479,7 @@ void NarrowingConversionsCheck::handleFloatingToIntegral( void NarrowingConversionsCheck::handleFloatingToBoolean( const ASTContext &Context, SourceLocation SourceLoc, const Expr &Lhs, const Expr &Rhs) { - return diagNarrowTypeOrConstant(Context, SourceLoc, Lhs, Rhs); + diagNarrowTypeOrConstant(Context, SourceLoc, Lhs, Rhs); } void NarrowingConversionsCheck::handleBooleanToSignedIntegral( @@ -532,19 +533,20 @@ void NarrowingConversionsCheck::handleBinaryOperator(const ASTContext &Context, if (LhsType == RhsType) return; if (RhsType->getKind() == BuiltinType::Bool && LhsType->isSignedInteger()) - return handleBooleanToSignedIntegral(Context, SourceLoc, Lhs, Rhs); - if (RhsType->isInteger() && LhsType->getKind() == BuiltinType::Bool) - return handleIntegralToBoolean(Context, SourceLoc, Lhs, Rhs); - if (RhsType->isInteger() && LhsType->isFloatingPoint()) - return handleIntegralToFloating(Context, SourceLoc, Lhs, Rhs); - if (RhsType->isInteger() && LhsType->isInteger()) - return handleIntegralCast(Context, SourceLoc, Lhs, Rhs); - if (RhsType->isFloatingPoint() && LhsType->getKind() == BuiltinType::Bool) - return handleFloatingToBoolean(Context, SourceLoc, Lhs, Rhs); - if (RhsType->isFloatingPoint() && LhsType->isInteger()) - return handleFloatingToIntegral(Context, SourceLoc, Lhs, Rhs); - if (RhsType->isFloatingPoint() && LhsType->isFloatingPoint()) - return handleFloatingCast(Context, SourceLoc, Lhs, Rhs); + handleBooleanToSignedIntegral(Context, SourceLoc, Lhs, Rhs); + else if (RhsType->isInteger() && LhsType->getKind() == BuiltinType::Bool) + handleIntegralToBoolean(Context, SourceLoc, Lhs, Rhs); + else if (RhsType->isInteger() && LhsType->isFloatingPoint()) + handleIntegralToFloating(Context, SourceLoc, Lhs, Rhs); + else if (RhsType->isInteger() && LhsType->isInteger()) + handleIntegralCast(Context, SourceLoc, Lhs, Rhs); + else if (RhsType->isFloatingPoint() && + LhsType->getKind() == BuiltinType::Bool) + handleFloatingToBoolean(Context, SourceLoc, Lhs, Rhs); + else if (RhsType->isFloatingPoint() && LhsType->isInteger()) + handleFloatingToIntegral(Context, SourceLoc, Lhs, Rhs); + else if (RhsType->isFloatingPoint() && LhsType->isFloatingPoint()) + handleFloatingCast(Context, SourceLoc, Lhs, Rhs); } bool NarrowingConversionsCheck::handleConditionalOperator( @@ -577,21 +579,28 @@ void NarrowingConversionsCheck::handleImplicitCast( SourceLocation SourceLoc = Lhs.getExprLoc(); switch (Cast.getCastKind()) { case CK_BooleanToSignedIntegral: - return handleBooleanToSignedIntegral(Context, SourceLoc, Lhs, Rhs); + handleBooleanToSignedIntegral(Context, SourceLoc, Lhs, Rhs); + return; case CK_IntegralToBoolean: - return handleIntegralToBoolean(Context, SourceLoc, Lhs, Rhs); + handleIntegralToBoolean(Context, SourceLoc, Lhs, Rhs); + return; case CK_IntegralToFloating: - return handleIntegralToFloating(Context, SourceLoc, Lhs, Rhs); + handleIntegralToFloating(Context, SourceLoc, Lhs, Rhs); + return; case CK_IntegralCast: - return handleIntegralCast(Context, SourceLoc, Lhs, Rhs); + handleIntegralCast(Context, SourceLoc, Lhs, Rhs); + return; case CK_FloatingToBoolean: - return handleFloatingToBoolean(Context, SourceLoc, Lhs, Rhs); + handleFloatingToBoolean(Context, SourceLoc, Lhs, Rhs); + return; case CK_FloatingToIntegral: - return handleFloatingToIntegral(Context, SourceLoc, Lhs, Rhs); + handleFloatingToIntegral(Context, SourceLoc, Lhs, Rhs); + return; case CK_FloatingCast: - return handleFloatingCast(Context, SourceLoc, Lhs, Rhs); + handleFloatingCast(Context, SourceLoc, Lhs, Rhs); + return; default: - break; + return; } } @@ -610,9 +619,10 @@ void NarrowingConversionsCheck::handleBinaryOperator(const ASTContext &Context, void NarrowingConversionsCheck::check(const MatchFinder::MatchResult &Result) { if (const auto *Op = Result.Nodes.getNodeAs<BinaryOperator>("binary_op")) - return handleBinaryOperator(*Result.Context, *Op); - if (const auto *Cast = Result.Nodes.getNodeAs<ImplicitCastExpr>("cast")) - return handleImplicitCast(*Result.Context, *Cast); - llvm_unreachable("must be binary operator or cast expression"); + handleBinaryOperator(*Result.Context, *Op); + else if (const auto *Cast = Result.Nodes.getNodeAs<ImplicitCastExpr>("cast")) + handleImplicitCast(*Result.Context, *Cast); + else + llvm_unreachable("must be binary operator or cast expression"); } } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp index 20c7329..5f19706 100644 --- a/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/ImplicitBoolConversionCheck.cpp @@ -361,14 +361,15 @@ void ImplicitBoolConversionCheck::check( if (const auto *CastToBool = Result.Nodes.getNodeAs<ImplicitCastExpr>("implicitCastToBool")) { const auto *Parent = Result.Nodes.getNodeAs<Stmt>("parentStmt"); - return handleCastToBool(CastToBool, Parent, *Result.Context); + handleCastToBool(CastToBool, Parent, *Result.Context); + return; } if (const auto *CastFromBool = Result.Nodes.getNodeAs<ImplicitCastExpr>("implicitCastFromBool")) { const auto *NextImplicitCast = Result.Nodes.getNodeAs<ImplicitCastExpr>("furtherImplicitCast"); - return handleCastFromBool(CastFromBool, NextImplicitCast, *Result.Context); + handleCastFromBool(CastFromBool, NextImplicitCast, *Result.Context); } } diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index f33950b..756db85 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -139,7 +139,7 @@ wording a diagnostic. you mean %1?``. * Appropriately capitalize proper nouns like ``Clang``, ``OpenCL``, ``GCC``, - ``Objective-C``, etc and language standard versions like ``C11`` or ``C++11``. + ``Objective-C``, etc. and language standard versions like ``C11`` or ``C++11``. * The wording should be succinct. If necessary, use a semicolon to combine sentence fragments instead of using complete sentences. e.g., prefer wording like ``'%0' is deprecated; it will be removed in a future release of Clang`` @@ -886,7 +886,7 @@ a string that the tablegen backend uses as a prefix to the LANG_OPTION_WITH_MARSHALLING([...], LangOpts->IgnoreExceptions, [...]) #endif // LANG_OPTION_WITH_MARSHALLING -Such definition can be used used in the function for parsing and generating +Such definition can be used in the function for parsing and generating command line: .. code-block:: c++ @@ -1745,7 +1745,7 @@ will be found by the lookup, since it effectively replaces the first declaration of "``f``". (Note that because ``f`` can be redeclared at block scope, or in a friend -declaration, etc. it is possible that the declaration of ``f`` found by name +declaration, etc., it is possible that the declaration of ``f`` found by name lookup will not be the most recent one.) In the semantics-centric view, overloading of functions is represented @@ -1945,7 +1945,7 @@ range of iterators over declarations of "``f``". function ``DeclContext::getPrimaryContext`` retrieves the "primary" context for a given ``DeclContext`` instance, which is the ``DeclContext`` responsible for maintaining the lookup table used for the semantics-centric view. Given a -DeclContext, one can obtain the set of declaration contexts that are +``DeclContext``, one can obtain the set of declaration contexts that are semantically connected to this declaration context, in source order, including this context (which will be the only result, for non-namespace contexts) via ``DeclContext::collectAllContexts``. Note that these functions are used @@ -1985,7 +1985,7 @@ broken code in the AST: errors, the Decl node is marked as invalid. - dropping invalid node: this often happens for errors that we don’t have graceful recovery. Prior to Recovery AST, a mismatched-argument function call - expression was dropped though a CallExpr was created for semantic analysis. + expression was dropped though a ``CallExpr`` was created for semantic analysis. With these strategies, clang surfaces better diagnostics, and provides AST consumers a rich AST reflecting the written source code as much as possible even diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4a2edae..247d784 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -241,12 +241,16 @@ Static Analyzer --------------- - The Clang Static Analyzer now handles parenthesized initialization. (#GH148875) +- ``__datasizeof`` (C++) and ``_Countof`` (C) no longer cause a failed assertion + when given an operand of VLA type. (#GH151711) New features ^^^^^^^^^^^^ Crash and bug fixes ^^^^^^^^^^^^^^^^^^^ +- Fixed a crash in the static analyzer that when the expression in an + ``[[assume(expr)]]`` attribute was enclosed in parentheses. (#GH151529) Improvements ^^^^^^^^^^^^ diff --git a/clang/docs/ThinLTO.rst b/clang/docs/ThinLTO.rst index 569405f..8cb3e0b 100644 --- a/clang/docs/ThinLTO.rst +++ b/clang/docs/ThinLTO.rst @@ -249,6 +249,9 @@ during the traditional link step. The implementation is documented here: https://llvm.org/docs/DTLTO.html. +Command-Line Options +^^^^^^^^^^^^^^^^^^^^ + DTLTO requires the LLD linker (``-fuse-ld=lld``). ``-fthinlto-distributor=<path>`` @@ -260,17 +263,29 @@ DTLTO requires the LLD linker (``-fuse-ld=lld``). - Can be specified multiple times to pass multiple options. - Multiple options can also be specified by separating them with commas. -Examples: - - ``clang -flto=thin -fthinlto-distributor=incredibuild.exe -Xthinlto-distributor=--verbose,--j10 -fuse-ld=lld`` - - ``clang -flto=thin -fthinlto-distributor=$(which python) -Xthinlto-distributor=incredibuild.py -fuse-ld=lld`` - If ``-fthinlto-distributor=`` is specified, Clang supplies the path to a compiler to be executed remotely to perform the ThinLTO backend compilations. Currently, this is Clang itself. +Usage +^^^^^ + +Compilation is unchanged from ThinLTO. DTLTO options need to supplied for the link step: + +.. code-block:: console + + % clang -flto=thin -fthinlto-distributor=distribute.sh -Xthinlto-distributor=--verbose,--j10 -fuse-ld=lld file1.o file2.o + % clang -flto=thin -fthinlto-distributor=$(which python) -Xthinlto-distributor=distribute.py -fuse-ld=lld file1.o file2.o + +When using lld-link: + +.. code-block:: console + + % lld-link /out:a.exe file1.obj file2.obj /thinlto-distributor:distribute.exe /thinlto-remote-compiler:${LLVM}\bin\clang.exe /thinlto-distributor-arg:--verbose + Note that currently, DTLTO is only supported in some LLD flavors. Support can be added to other LLD flavours in the future. -See `DTLTO <https://lld.llvm.org/dtlto.html>`_ for more information. +See `DTLTO <https://lld.llvm.org/DTLTO.html>`_ for more information. More Information ================ diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index 6c124aa..237b3b2 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -553,17 +553,13 @@ public: bool IgnoreTemplateOrMacroSubstitution = false) const; /// isIntegerConstantExpr - Return the value if this expression is a valid - /// integer constant expression. If not a valid i-c-e, return std::nullopt - /// and fill in Loc (if specified) with the location of the invalid - /// expression. + /// integer constant expression. If not a valid i-c-e, return std::nullopt. /// /// Note: This does not perform the implicit conversions required by C++11 /// [expr.const]p5. std::optional<llvm::APSInt> - getIntegerConstantExpr(const ASTContext &Ctx, - SourceLocation *Loc = nullptr) const; - bool isIntegerConstantExpr(const ASTContext &Ctx, - SourceLocation *Loc = nullptr) const; + getIntegerConstantExpr(const ASTContext &Ctx) const; + bool isIntegerConstantExpr(const ASTContext &Ctx) const; /// isCXX98IntegralConstantExpr - Return true if this expression is an /// integral constant expression in C++98. Can only be used in C++. @@ -574,8 +570,8 @@ public: /// /// Note: This does not perform the implicit conversions required by C++11 /// [expr.const]p5. - bool isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result = nullptr, - SourceLocation *Loc = nullptr) const; + bool isCXX11ConstantExpr(const ASTContext &Ctx, + APValue *Result = nullptr) const; /// isPotentialConstantExpr - Return true if this function's definition /// might be usable in a constant expression in C++11, if it were marked diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index bb3953e..e117e99 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -716,11 +716,26 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_bf16_fp4, "V8yUiUiIUi", "nc", "gfx TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp8, "V8fV2UiUiIUi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_bf8, "V8fV2UiUiIUi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_scale_pk8_f32_fp4, "V8fUiUiIUi", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_bf16, "V2UiV8yf", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_bf16, "V2UiV8yf", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_f16, "V2UiV8hf", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_f16, "V2UiV8hf", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp8_f32, "V2UiV8ff", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_bf8_f32, "V2UiV8ff", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp4_f32, "UiV8ff", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp4_f16, "UiV8hf", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_pk8_fp4_bf16, "UiV8yf", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_pk_fp8_f32_e5m3, "iffiIb", "nc", "fp8e5m3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_fp8_f32_e5m3, "ifiiIi", "nc", "fp8e5m3-insts") TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_i4_i8, "UsUi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_sat_pk4_u4_u8, "UsUi", "nc", "gfx1250-insts") +TARGET_BUILTIN(__builtin_amdgcn_permlane_bcast, "iiii", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_permlane_up, "iiii", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_permlane_down, "iiii", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_permlane_xor, "iiii", "nc", "gfx1250-insts,wavefrontsize32") +TARGET_BUILTIN(__builtin_amdgcn_permlane_idx_gen, "iii", "nc", "gfx1250-insts,wavefrontsize32") + // GFX1250 WMMA builtins TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x4_f32, "V8fIbV2fIbV2fIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") TARGET_BUILTIN(__builtin_amdgcn_wmma_f32_16x16x32_bf16, "V8fIbV16yIbV16yIsV8fIbIb", "nc", "gfx1250-insts,wavefrontsize32") diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b4b94b8..07786c6 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -20,21 +20,21 @@ include "arm_sve_sme_incl.td" // Load one vector (scalar base) def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; -def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; -def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; // Load one vector (scalar base, VL displacement) def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">; -def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; -def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_ld1">; let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { // Load one vector (vector base) @@ -117,22 +117,22 @@ def SVLD1UW_GATHER_INDEX_S : MInst<"svld1uw_gather[_{2}base]_index_{d}", "dPul // First-faulting load one vector (scalar base) def SVLDFF1 : MInst<"svldff1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; -def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; -def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; -def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; -def SVLDFF1UH : MInst<"svldff1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">; -def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; -def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; +def SVLDFF1SB : MInst<"svldff1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1UB : MInst<"svldff1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1SH : MInst<"svldff1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1UH : MInst<"svldff1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1SW : MInst<"svldff1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; +def SVLDFF1UW : MInst<"svldff1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; // First-faulting load one vector (scalar base, VL displacement) def SVLDFF1_VNUM : MInst<"svldff1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">; -def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; -def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; -def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; -def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">; -def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; -def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; -} // let SVETargetGuard = "sve", SMETargetGuard = InvalidMode +def SVLDFF1SB_VNUM : MInst<"svldff1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1UB_VNUM : MInst<"svldff1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldff1">; +def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldff1">; +def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldff1">; +def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldff1">; +} let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { // First-faulting load one vector (vector base) @@ -214,22 +214,22 @@ def SVLDFF1UW_GATHER_INDEX_S : MInst<"svldff1uw_gather[_{2}base]_index_{d}", "dP // Non-faulting load one vector (scalar base) def SVLDNF1 : MInst<"svldnf1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; -def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; -def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; -def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; -def SVLDNF1UH : MInst<"svldnf1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">; -def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; -def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; +def SVLDNF1SB : MInst<"svldnf1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1UB : MInst<"svldnf1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1SH : MInst<"svldnf1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1UH : MInst<"svldnf1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1SW : MInst<"svldnf1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; +def SVLDNF1UW : MInst<"svldnf1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; // Non-faulting load one vector (scalar base, VL displacement) def SVLDNF1_VNUM : MInst<"svldnf1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">; -def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; -def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; -def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; -def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">; -def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; -def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; -} // let SVETargetGuard = "sve", SMETargetGuard = InvalidMode +def SVLDNF1SB_VNUM : MInst<"svldnf1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1UB_VNUM : MInst<"svldnf1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ldnf1">; +def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ldnf1">; +def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ldnf1">; +def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ldnf1">; +} // Load one vector, unextended load, non-temporal (scalar base) def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdbm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">; @@ -240,19 +240,15 @@ def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdbm", [IsL // Load one quadword and replicate (scalar base) def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld1rq", [VerifyRuntimeMode]>; -multiclass StructLoad<string name, string proto, string i, list<FlagType> f = []> { - def : SInst<name, proto, "csilUcUsUiUlhfdbm", MergeNone, i, !listconcat(f, [IsStructLoad])>; -} - // Load N-element structure into N vectors (scalar base) -defm SVLD2 : StructLoad<"svld2[_{2}]", "2Pc", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>; -defm SVLD3 : StructLoad<"svld3[_{2}]", "3Pc", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>; -defm SVLD4 : StructLoad<"svld4[_{2}]", "4Pc", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>; +def SVLD2 : SInst<"svld2[_{2}]", "2Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld2_sret", [IsStructLoad, VerifyRuntimeMode]>; +def SVLD3 : SInst<"svld3[_{2}]", "3Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld3_sret", [IsStructLoad, VerifyRuntimeMode]>; +def SVLD4 : SInst<"svld4[_{2}]", "4Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld4_sret", [IsStructLoad, VerifyRuntimeMode]>; // Load N-element structure into N vectors (scalar base, VL displacement) -defm SVLD2_VNUM : StructLoad<"svld2_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2_sret", [VerifyRuntimeMode]>; -defm SVLD3_VNUM : StructLoad<"svld3_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3_sret", [VerifyRuntimeMode]>; -defm SVLD4_VNUM : StructLoad<"svld4_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4_sret", [VerifyRuntimeMode]>; +def SVLD2_VNUM : SInst<"svld2_vnum[_{2}]", "2Pcl", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld2_sret", [IsStructLoad, VerifyRuntimeMode]>; +def SVLD3_VNUM : SInst<"svld3_vnum[_{2}]", "3Pcl", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld3_sret", [IsStructLoad, VerifyRuntimeMode]>; +def SVLD4_VNUM : SInst<"svld4_vnum[_{2}]", "4Pcl", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld4_sret", [IsStructLoad, VerifyRuntimeMode]>; // Load one octoword and replicate (scalar base) let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in { @@ -264,28 +260,28 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in { } let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in { - def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>; - def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>; - def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>; - def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>; - def SVBFMLALB_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>; - def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>; + def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>; + def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>; + def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>; + def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, VerifyRuntimeMode]>; + def SVBFMLALB_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, VerifyRuntimeMode]>; + def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, VerifyRuntimeMode]>; def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfdot_lane_v2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>; def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalb_lane_v2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddi", "b", MergeNone, "aarch64_sve_bfmlalt_lane_v2", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; -} // let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" +} let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Contiguous zero-extending load to quadword (single vector). - def SVLD1UWQ : MInst<"svld1uwq[_{d}]", "dPc", "iUif", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1uwq">; + def SVLD1UWQ : MInst<"svld1uwq[_{d}]", "dPc", "iUif", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1uwq">; def SVLD1UWQ_VNUM : MInst<"svld1uwq_vnum[_{d}]", "dPcl", "iUif", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1uwq">; - def SVLD1UDQ : MInst<"svld1udq[_{d}]", "dPc", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; + def SVLD1UDQ : MInst<"svld1udq[_{d}]", "dPc", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">; // Load one vector (vector base + scalar offset) def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; - def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; + def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">; // Load one vector (scalar base + vector offset) def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">; @@ -299,14 +295,14 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { // Load N-element structure into N vectors (scalar base) - defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret", [VerifyRuntimeMode]>; - defm SVLD3Q : StructLoad<"svld3q[_{2}]", "3Pc", "aarch64_sve_ld3q_sret", [VerifyRuntimeMode]>; - defm SVLD4Q : StructLoad<"svld4q[_{2}]", "4Pc", "aarch64_sve_ld4q_sret", [VerifyRuntimeMode]>; + def SVLD2Q : SInst<"svld2q[_{2}]", "2Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld2q_sret", [IsStructLoad, VerifyRuntimeMode]>; + def SVLD3Q : SInst<"svld3q[_{2}]", "3Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld3q_sret", [IsStructLoad, VerifyRuntimeMode]>; + def SVLD4Q : SInst<"svld4q[_{2}]", "4Pc", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld4q_sret", [IsStructLoad, VerifyRuntimeMode]>; // Load N-element structure into N vectors (scalar base, VL displacement) - defm SVLD2Q_VNUM : StructLoad<"svld2q_vnum[_{2}]", "2Pcl", "aarch64_sve_ld2q_sret", [VerifyRuntimeMode]>; - defm SVLD3Q_VNUM : StructLoad<"svld3q_vnum[_{2}]", "3Pcl", "aarch64_sve_ld3q_sret", [VerifyRuntimeMode]>; - defm SVLD4Q_VNUM : StructLoad<"svld4q_vnum[_{2}]", "4Pcl", "aarch64_sve_ld4q_sret", [VerifyRuntimeMode]>; + def SVLD2Q_VNUM : SInst<"svld2q_vnum[_{2}]", "2Pcl", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld2q_sret", [IsStructLoad, VerifyRuntimeMode]>; + def SVLD3Q_VNUM : SInst<"svld3q_vnum[_{2}]", "3Pcl", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld3q_sret", [IsStructLoad, VerifyRuntimeMode]>; + def SVLD4Q_VNUM : SInst<"svld4q_vnum[_{2}]", "4Pcl", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_ld4q_sret", [IsStructLoad, VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// @@ -314,21 +310,21 @@ let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { // Store one vector (scalar base) def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; -def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; -def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">; -def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; -def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">; let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { // Store one vector (vector base) @@ -399,21 +395,17 @@ def SVST1H_SCATTER_32B_INDICES_UU : MInst<"svst1h_scatter_[{3}]index[_{d}]", "vP def SVST1_SCATTER_INDEX_S : MInst<"svst1_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">; def SVST1H_SCATTER_INDEX_S : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl", [IsScatterStore], MemEltTyInt16, "aarch64_sve_st1_scatter_scalar_offset">; def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">; -} // let SVETargetGuard = "sve" - -multiclass StructStore<string name, string proto, string i, list<FlagType> f = []> { - def : SInst<name, proto, "csilUcUsUiUlhfdbm", MergeNone, i, !listconcat(f, [IsStructStore])>; } // Store N vectors into N-element structure (scalar base) -defm SVST2 : StructStore<"svst2[_{d}]", "vPp2", "aarch64_sve_st2", [VerifyRuntimeMode]>; -defm SVST3 : StructStore<"svst3[_{d}]", "vPp3", "aarch64_sve_st3", [VerifyRuntimeMode]>; -defm SVST4 : StructStore<"svst4[_{d}]", "vPp4", "aarch64_sve_st4", [VerifyRuntimeMode]>; +def SVST2 : SInst<"svst2[_{d}]", "vPp2", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st2", [IsStructStore, VerifyRuntimeMode]>; +def SVST3 : SInst<"svst3[_{d}]", "vPp3", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st3", [IsStructStore, VerifyRuntimeMode]>; +def SVST4 : SInst<"svst4[_{d}]", "vPp4", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st4", [IsStructStore, VerifyRuntimeMode]>; // Store N vectors into N-element structure (scalar base, VL displacement) -defm SVST2_VNUM : StructStore<"svst2_vnum[_{d}]", "vPpl2", "aarch64_sve_st2", [VerifyRuntimeMode]>; -defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3", [VerifyRuntimeMode]>; -defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4", [VerifyRuntimeMode]>; +def SVST2_VNUM : SInst<"svst2_vnum[_{d}]", "vPpl2", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st2", [IsStructStore, VerifyRuntimeMode]>; +def SVST3_VNUM : SInst<"svst3_vnum[_{d}]", "vPpl3", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st3", [IsStructStore, VerifyRuntimeMode]>; +def SVST4_VNUM : SInst<"svst4_vnum[_{d}]", "vPpl4", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st4", [IsStructStore, VerifyRuntimeMode]>; // Store one vector, with no truncation, non-temporal (scalar base) def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdbm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">; @@ -423,15 +415,15 @@ def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdbm", [Is let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { // Contiguous truncating store from quadword (single vector). - def SVST1UWQ : MInst<"svst1wq[_{d}]", "vPpd", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">; + def SVST1UWQ : MInst<"svst1wq[_{d}]", "vPpd", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">; def SVST1UWQ_VNUM : MInst<"svst1wq_vnum[_{d}]", "vPpld", "iUif", [IsStore], MemEltTyInt32, "aarch64_sve_st1wq">; - def SVST1UDQ : MInst<"svst1dq[_{d}]", "vPpd", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; + def SVST1UDQ : MInst<"svst1dq[_{d}]", "vPpd", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPpld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">; // Store one vector (vector base + scalar offset) def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; - def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; + def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">; // Store one vector (scalar base + vector offset) def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">; @@ -447,14 +439,14 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in { let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { // Store N vectors into N-element structure (scalar base) - defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q", [VerifyRuntimeMode]>; - defm SVST3Q : StructStore<"svst3q[_{d}]", "vPc3", "aarch64_sve_st3q", [VerifyRuntimeMode]>; - defm SVST4Q : StructStore<"svst4q[_{d}]", "vPc4", "aarch64_sve_st4q", [VerifyRuntimeMode]>; + def SVST2Q : SInst<"svst2q[_{d}]", "vPc2", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st2q", [IsStructStore, VerifyRuntimeMode]>; + def SVST3Q : SInst<"svst3q[_{d}]", "vPc3", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st3q", [IsStructStore, VerifyRuntimeMode]>; + def SVST4Q : SInst<"svst4q[_{d}]", "vPc4", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st4q", [IsStructStore, VerifyRuntimeMode]>; // Store N vectors into N-element structure (scalar base, VL displacement) - defm SVST2Q_VNUM : StructStore<"svst2q_vnum[_{d}]", "vPcl2", "aarch64_sve_st2q", [VerifyRuntimeMode]>; - defm SVST3Q_VNUM : StructStore<"svst3q_vnum[_{d}]", "vPcl3", "aarch64_sve_st3q", [VerifyRuntimeMode]>; - defm SVST4Q_VNUM : StructStore<"svst4q_vnum[_{d}]", "vPcl4", "aarch64_sve_st4q", [VerifyRuntimeMode]>; + def SVST2Q_VNUM : SInst<"svst2q_vnum[_{d}]", "vPcl2", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st2q", [IsStructStore, VerifyRuntimeMode]>; + def SVST3Q_VNUM : SInst<"svst3q_vnum[_{d}]", "vPcl3", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st3q", [IsStructStore, VerifyRuntimeMode]>; + def SVST4Q_VNUM : SInst<"svst4q_vnum[_{d}]", "vPcl4", "csilUcUsUiUlhfdbm", MergeNone, "aarch64_sve_st4q", [IsStructStore, VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// @@ -505,7 +497,7 @@ def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ" def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">; def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">; def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index", "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">; -} // let SVETargetGuard = "sve" +} //////////////////////////////////////////////////////////////////////////////// // Address calculations @@ -515,7 +507,7 @@ def SVADRB : SInst<"svadrb[_{0}base]_[{2}]offset", "uud", "ilUiUl", MergeNone, " def SVADRH : SInst<"svadrh[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrh">; def SVADRW : SInst<"svadrw[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrw">; def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrd">; -} // let SVETargetGuard = "sve" +} //////////////////////////////////////////////////////////////////////////////// // Scalar to vector @@ -550,9 +542,9 @@ defm SVNEG : SInstZPZ<"svneg", "csil", "aarch64_sve_neg">; //------------------------------------------------------------------------------ multiclass SInstZPZZ<string name, string types, string m_intrinsic, string x_intrinsic, list<FlagType> flags=[]> { - def _M : SInst<name # "[_{d}]", "dPdd", types, MergeOp1, m_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; - def _X : SInst<name # "[_{d}]", "dPdd", types, MergeAny, x_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; - def _Z : SInst<name # "[_{d}]", "dPdd", types, MergeZero, m_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; + def _M : SInst<name # "[_{d}]", "dPdd", types, MergeOp1, m_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; + def _X : SInst<name # "[_{d}]", "dPdd", types, MergeAny, x_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; + def _Z : SInst<name # "[_{d}]", "dPdd", types, MergeZero, m_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; def _N_M : SInst<name # "[_n_{d}]", "dPda", types, MergeOp1, m_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; def _N_X : SInst<name # "[_n_{d}]", "dPda", types, MergeAny, x_intrinsic, !listconcat(flags, [VerifyRuntimeMode])>; @@ -579,9 +571,9 @@ defm SVSUBR : SInstZPZZ<"svsubr", "csilUcUsUiUl", "aarch64_sve_subr", "aarch6 //------------------------------------------------------------------------------ multiclass SInstZPZZZ<string name, string types, string m_intrinsic, string x_intrinsic, list<FlagType> flags=[]> { - def _M : SInst<name # "[_{d}]", "dPddd", types, MergeOp1, m_intrinsic, flags>; - def _X : SInst<name # "[_{d}]", "dPddd", types, MergeAny, x_intrinsic, flags>; - def _Z : SInst<name # "[_{d}]", "dPddd", types, MergeZero, m_intrinsic, flags>; + def _M : SInst<name # "[_{d}]", "dPddd", types, MergeOp1, m_intrinsic, flags>; + def _X : SInst<name # "[_{d}]", "dPddd", types, MergeAny, x_intrinsic, flags>; + def _Z : SInst<name # "[_{d}]", "dPddd", types, MergeZero, m_intrinsic, flags>; def _N_M : SInst<name # "[_n_{d}]", "dPdda", types, MergeOp1, m_intrinsic, flags>; def _N_X : SInst<name # "[_n_{d}]", "dPdda", types, MergeAny, x_intrinsic, flags>; @@ -595,12 +587,12 @@ defm SVMSB : SInstZPZZZ<"svmsb", "csilUcUsUiUl", "aarch64_sve_msb", "aarch64_sve //------------------------------------------------------------------------------ -def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot", [VerifyRuntimeMode]>; -def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot", [VerifyRuntimeMode]>; -def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x", [VerifyRuntimeMode]>; -def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x", [VerifyRuntimeMode]>; -def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x", [VerifyRuntimeMode]>; -def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x", [VerifyRuntimeMode]>; +def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot", [VerifyRuntimeMode]>; +def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot", [VerifyRuntimeMode]>; +def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x", [VerifyRuntimeMode]>; +def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x", [VerifyRuntimeMode]>; +def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x", [VerifyRuntimeMode]>; +def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x", [VerifyRuntimeMode]>; def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot", [VerifyRuntimeMode]>; def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot", [VerifyRuntimeMode]>; @@ -615,10 +607,10 @@ def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarc //////////////////////////////////////////////////////////////////////////////// // Logical operations -defm SVAND : SInstZPZZ<"svand", "csilUcUsUiUl", "aarch64_sve_and", "aarch64_sve_and_u">; -defm SVBIC : SInstZPZZ<"svbic", "csilUcUsUiUl", "aarch64_sve_bic", "aarch64_sve_bic_u">; -defm SVEOR : SInstZPZZ<"sveor", "csilUcUsUiUl", "aarch64_sve_eor", "aarch64_sve_eor_u">; -defm SVORR : SInstZPZZ<"svorr", "csilUcUsUiUl", "aarch64_sve_orr", "aarch64_sve_orr_u">; +defm SVAND : SInstZPZZ<"svand", "csilUcUsUiUl", "aarch64_sve_and", "aarch64_sve_and_u">; +defm SVBIC : SInstZPZZ<"svbic", "csilUcUsUiUl", "aarch64_sve_bic", "aarch64_sve_bic_u">; +defm SVEOR : SInstZPZZ<"sveor", "csilUcUsUiUl", "aarch64_sve_eor", "aarch64_sve_eor_u">; +defm SVORR : SInstZPZZ<"svorr", "csilUcUsUiUl", "aarch64_sve_orr", "aarch64_sve_orr_u">; defm SVCNOT : SInstZPZ<"svcnot", "csilUcUsUiUl", "aarch64_sve_cnot">; defm SVNOT : SInstZPZ<"svnot", "csilUcUsUiUl", "aarch64_sve_not">; @@ -648,9 +640,9 @@ defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">; defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">; defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; -def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_insr", [VerifyRuntimeMode]>; @@ -785,19 +777,20 @@ defm SVRINTP : SInstZPZ<"svrintp", "hfd", "aarch64_sve_frintp">; defm SVRINTX : SInstZPZ<"svrintx", "hfd", "aarch64_sve_frintx">; defm SVRINTZ : SInstZPZ<"svrintz", "hfd", "aarch64_sve_frintz">; defm SVSQRT : SInstZPZ<"svsqrt", "hfd", "aarch64_sve_fsqrt">; -def SVEXPA : SInst<"svexpa[_{d}]", "du", "hfd", MergeNone, "aarch64_sve_fexpa_x", [VerifyRuntimeMode]>{ - let SVETargetGuard = "sve"; - let SMETargetGuard = "sme2,ssve-fexpa"; + +let SVETargetGuard = "sve", SMETargetGuard = "sme2,ssve-fexpa" in { +def SVEXPA : SInst<"svexpa[_{d}]", "du", "hfd", MergeNone, "aarch64_sve_fexpa_x", [VerifyRuntimeMode]>; } + let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>; def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftsmul_x">; def SVTSSEL : SInst<"svtssel[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftssel_x">; } -def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale", [VerifyRuntimeMode]>; -def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale", [VerifyRuntimeMode]>; -def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale", [VerifyRuntimeMode]>; +def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale", [VerifyRuntimeMode]>; +def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale", [VerifyRuntimeMode]>; +def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale", [VerifyRuntimeMode]>; def SVSCALE_N_M : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeOp1, "aarch64_sve_fscale", [VerifyRuntimeMode]>; def SVSCALE_N_X : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeAny, "aarch64_sve_fscale", [VerifyRuntimeMode]>; @@ -819,8 +812,7 @@ def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fc def SVCMLA_X : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeAny, "aarch64_sve_fcmla", [VerifyRuntimeMode], [ImmCheck<4, ImmCheckComplexRotAll90>]>; def SVCMLA_Z : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeZero, "aarch64_sve_fcmla", [VerifyRuntimeMode], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, - ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVMUL_LANE : SInst<"svmul_lane[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_fmul_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; @@ -834,8 +826,9 @@ def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frs // Floating-point reductions let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { -def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">; +def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">; } + def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv", [VerifyRuntimeMode]>; def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv", [VerifyRuntimeMode]>; def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv", [VerifyRuntimeMode]>; @@ -961,15 +954,15 @@ def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "$$Pd", "f", MergeOp1, "aar } let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -defm SVCVTLT_F32_F16 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">; -defm SVCVTLT_F64_F32 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">; +defm SVCVTLT_F32_F16 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">; +defm SVCVTLT_F64_F32 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">; -defm SVCVTX_F32_F64 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">; +defm SVCVTX_F32_F64 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">; -def SVCVTNT_F16_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, VerifyRuntimeMode]>; -def SVCVTNT_F32_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; -// SVCVTNT_X_F16_F32 : Implemented as macro by SveEmitter.cpp -// SVCVTNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp +def SVCVTNT_F16_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, VerifyRuntimeMode]>; +def SVCVTNT_F32_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; +// SVCVTNT_X_F16_F32 : Implemented as macro by SveEmitter.cpp +// SVCVTNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp def SVCVTXNT_F32_F64 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, VerifyRuntimeMode]>; // SVCVTXNT_X_F32_F64 : Implemented as macro by SveEmitter.cpp @@ -982,39 +975,39 @@ multiclass SVEPerm<string name, string proto, string i> { def : SInst<name, proto, "csilUcUsUiUlhfdb", MergeNone, i, [VerifyRuntimeMode]>; } -defm SVCLASTA : SVEPerm<"svclasta[_{d}]", "dPdd", "aarch64_sve_clasta">; -defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">; -defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">; -defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">; +defm SVCLASTA : SVEPerm<"svclasta[_{d}]", "dPdd", "aarch64_sve_clasta">; +defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">; +defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">; +defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">; let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { -def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; +def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; } // Note: svdup_lane is implemented using the intrinsic for TBL to represent a // splat of any possible lane. It is upto LLVM to pick a more efficient // instruction such as DUP (indexed) if the lane index fits the range of the // instruction's immediate. -def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; -def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>; -def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; -defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; -defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; -def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; -def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; -def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; -def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; - -def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>; -def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn2", [VerifyRuntimeMode]>; -def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi", [VerifyRuntimeMode]>; -def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi", [VerifyRuntimeMode]>; -def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo", [VerifyRuntimeMode]>; -def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo", [VerifyRuntimeMode]>; -def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp1", [VerifyRuntimeMode]>; -def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp2", [VerifyRuntimeMode]>; -def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip1", [VerifyRuntimeMode]>; -def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>; +def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; +def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>; +def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>; +defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; +defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; +def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; +def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_sel", [VerifyRuntimeMode]>; +def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>; +def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>; + +def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1", [VerifyRuntimeMode]>; +def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn2", [VerifyRuntimeMode]>; +def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi", [VerifyRuntimeMode]>; +def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi", [VerifyRuntimeMode]>; +def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo", [VerifyRuntimeMode]>; +def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo", [VerifyRuntimeMode]>; +def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp1", [VerifyRuntimeMode]>; +def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp2", [VerifyRuntimeMode]>; +def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip1", [VerifyRuntimeMode]>; +def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip2", [VerifyRuntimeMode]>; def SVREV_B8 : SInst<"svrev_b8", "PP", "Pc", MergeNone, "aarch64_sve_rev", [VerifyRuntimeMode]>; def SVREV_B16 : SInst<"svrev_b16", "PP", "Pc", MergeNone, "aarch64_sve_rev_b16", [IsOverloadNone, VerifyRuntimeMode]>; @@ -1053,14 +1046,13 @@ def SVZIP2_B64 : SInst<"svzip2_b64", "PPP", "Pc", MergeNone, "aarch64_sve_zip def SVPFALSE : SInst<"svpfalse[_b]", "Pv", "", MergeNone, "", [IsOverloadNone, VerifyRuntimeMode]>; def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [VerifyRuntimeMode]>; -def SVPTRUE : SInst<"svptrue_{d}", "Pv", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL, VerifyRuntimeMode]>; - -def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "Pssssssssssssssss", "Pc", MergeNone, "", [VerifyRuntimeMode]>; -def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "Pssssssss", "Ps", MergeNone, "", [VerifyRuntimeMode]>; -def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "Pssss", "Pi", MergeNone, "", [VerifyRuntimeMode]>; -def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone, "", [VerifyRuntimeMode]>; -def SVDUP_N_B : SInst<"svdup[_n]_{d}", "Ps", "PcPsPiPl", MergeNone, "", [VerifyRuntimeMode]>; +def SVPTRUE : SInst<"svptrue_{d}", "Pv", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL, VerifyRuntimeMode]>; +def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "Pssssssssssssssss", "Pc", MergeNone, "", [VerifyRuntimeMode]>; +def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "Pssssssss", "Ps", MergeNone, "", [VerifyRuntimeMode]>; +def SVDUPQ_B32 : SInst<"svdupq[_n]_{d}", "Pssss", "Pi", MergeNone, "", [VerifyRuntimeMode]>; +def SVDUPQ_B64 : SInst<"svdupq[_n]_{d}", "Pss", "Pl", MergeNone, "", [VerifyRuntimeMode]>; +def SVDUP_N_B : SInst<"svdup[_n]_{d}", "Ps", "PcPsPiPl", MergeNone, "", [VerifyRuntimeMode]>; //////////////////////////////////////////////////////////////////////////////// // Predicate operations @@ -1084,7 +1076,7 @@ def SVBRKPA_Z : SInst<"svbrkpa[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brk def SVBRKPB_Z : SInst<"svbrkpb[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpb_z", [VerifyRuntimeMode]>; def SVPFIRST : SInst<"svpfirst[_b]", "PPP", "Pc", MergeNone, "aarch64_sve_pfirst", [VerifyRuntimeMode]>; -def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext", [VerifyRuntimeMode]>; +def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext", [VerifyRuntimeMode]>; //////////////////////////////////////////////////////////////////////////////// // Testing predicates @@ -1097,9 +1089,9 @@ def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ // FFR manipulation let SVETargetGuard = "sve", SMETargetGuard = InvalidMode in { -def SVRDFFR : SInst<"svrdffr", "Pv", "Pc", MergeNone, "", [IsOverloadNone]>; +def SVRDFFR : SInst<"svrdffr", "Pv", "Pc", MergeNone, "", [IsOverloadNone]>; def SVRDFFR_Z : SInst<"svrdffr_z", "PP", "Pc", MergeNone, "", [IsOverloadNone]>; -def SVSETFFR : SInst<"svsetffr", "vv", "", MergeNone, "", [IsOverloadNone]>; +def SVSETFFR : SInst<"svsetffr", "vv", "", MergeNone, "", [IsOverloadNone]>; def SVWRFFR : SInst<"svwrffr", "vP", "Pc", MergeNone, "", [IsOverloadNone]>; } @@ -1116,7 +1108,7 @@ def SVCNTH : SInst<"svcnth", "nv", "", MergeNone, "aarch64_sve_cnth", [IsAppendS def SVCNTW : SInst<"svcntw", "nv", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone, VerifyRuntimeMode]>; def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone, VerifyRuntimeMode]>; -def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>; +def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>; def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfdb", MergeNone, "", [VerifyRuntimeMode]>; //////////////////////////////////////////////////////////////////////////////// @@ -1188,13 +1180,13 @@ def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i", MergeNone, "aarch64_sve_ } let SVETargetGuard = "sve,i8mm", SMETargetGuard = "sme,i8mm"in { -def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>; -def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>; -def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, VerifyRuntimeMode]>; -def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, VerifyRuntimeMode]>; +def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>; +def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>; +def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, VerifyRuntimeMode]>; +def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, VerifyRuntimeMode]>; -def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; -def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } let SVETargetGuard = "sve,f32mm", SMETargetGuard = InvalidMode in { @@ -1202,13 +1194,14 @@ def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla } let SVETargetGuard = "sve,f64mm", SMETargetGuard = InvalidMode in { -def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">; -def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip2q">; +def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd", "d", MergeNone, "aarch64_sve_fmmla">; + +def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn1q">; +def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_trn2q">; +def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp1q">; +def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_uzp2q">; +def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip1q">; +def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfdb", MergeNone, "aarch64_sve_zip2q">; } //////////////////////////////////////////////////////////////////////////////// @@ -1244,17 +1237,17 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVSET_2_B : SInst<"svset2[_b]", "22id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; def SVSET_4_B : SInst<"svset4[_b]", "44id", "Pc", MergeNone, "", [IsTupleSet, VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; - def SVUNDEF_2_B: Inst<"svundef2_b", "2", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>; - def SVUNDEF_4_B: Inst<"svundef4_b", "4", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>; + def SVUNDEF_2_B : Inst<"svundef2_b", "2", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>; + def SVUNDEF_4_B : Inst<"svundef4_b", "4", "Pc", MergeNone, "", [IsUndef, VerifyRuntimeMode], []>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 WhileGE/GT let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; -def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; +def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>; @@ -1262,14 +1255,14 @@ def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PcPsPiPl", MergeNone, " } let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { - def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2", [VerifyRuntimeMode]>; - def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2", [VerifyRuntimeMode]>; - def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2", [VerifyRuntimeMode]>; - def SVWHILEHS_U64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2", [VerifyRuntimeMode]>; - def SVWHILELE_S64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2", [VerifyRuntimeMode]>; - def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2", [VerifyRuntimeMode]>; - def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2", [VerifyRuntimeMode]>; - def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2", [VerifyRuntimeMode]>; + def SVWHILEGE_S64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege_x2", [VerifyRuntimeMode]>; + def SVWHILEGT_S64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt_x2", [VerifyRuntimeMode]>; + def SVWHILEHI_U64_X2 : SInst<"svwhilegt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehi_x2", [VerifyRuntimeMode]>; + def SVWHILEHS_U64_X2 : SInst<"svwhilege_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilehs_x2", [VerifyRuntimeMode]>; + def SVWHILELE_S64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele_x2", [VerifyRuntimeMode]>; + def SVWHILELT_S64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2ll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt_x2", [VerifyRuntimeMode]>; + def SVWHILELO_U64_X2 : SInst<"svwhilelt_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilelo_x2", [VerifyRuntimeMode]>; + def SVWHILELS_U64_X2 : SInst<"svwhilele_{d}[_{1}]_x2", "2nn", "PcPsPiPl", MergeNone, "aarch64_sve_whilels_x2", [VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1320,19 +1313,19 @@ defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_ defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", [VerifyRuntimeMode]>; defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", [VerifyRuntimeMode]>; -def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba", [VerifyRuntimeMode]>; -def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [VerifyRuntimeMode]>; -def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh", [VerifyRuntimeMode]>; -def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh", [VerifyRuntimeMode]>; -def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlah", [VerifyRuntimeMode]>; -def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [VerifyRuntimeMode]>; +def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba", [VerifyRuntimeMode]>; +def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [VerifyRuntimeMode]>; +def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh", [VerifyRuntimeMode]>; +def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh", [VerifyRuntimeMode]>; +def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlah", [VerifyRuntimeMode]>; +def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [VerifyRuntimeMode]>; -def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba", [VerifyRuntimeMode]>; -def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [VerifyRuntimeMode]>; -def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh", [VerifyRuntimeMode]>; -def SVQRDMULH_N : SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh", [VerifyRuntimeMode]>; -def SVQRDMLAH_N : SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah", [VerifyRuntimeMode]>; -def SVQRDMLSH_N : SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [VerifyRuntimeMode]>; +def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba", [VerifyRuntimeMode]>; +def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [VerifyRuntimeMode]>; +def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh", [VerifyRuntimeMode]>; +def SVQRDMULH_N : SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh", [VerifyRuntimeMode]>; +def SVQRDMLAH_N : SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah", [VerifyRuntimeMode]>; +def SVQRDMLSH_N : SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [VerifyRuntimeMode]>; def SVQDMULH_LANE : SInst<"svqdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqdmulh_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; def SVQRDMULH_LANE : SInst<"svqrdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqrdmulh_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; @@ -1381,9 +1374,9 @@ defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp", [ // SVE2 - Widening pairwise arithmetic let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp", [VerifyRuntimeMode]>; -def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp", [VerifyRuntimeMode]>; -def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp", [VerifyRuntimeMode]>; +def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp", [VerifyRuntimeMode]>; +def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp", [VerifyRuntimeMode]>; +def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp", [VerifyRuntimeMode]>; def SVADALP_U_M : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeOp1, "aarch64_sve_uadalp", [VerifyRuntimeMode]>; def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_sve_uadalp", [VerifyRuntimeMode]>; @@ -1441,11 +1434,9 @@ let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { def SVCADD : SInst<"svcadd[_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckComplexRot90_270>]>; def SVSQCADD : SInst<"svqcadd[_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_sqcadd_x", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckComplexRot90_270>]>; def SVCMLA : SInst<"svcmla[_{d}]", "ddddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ddddii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, - ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ddddii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; def SVSQRDCMLAH_X : SInst<"svqrdcmlah[_{d}]", "ddddi", "csil", MergeNone, "aarch64_sve_sqrdcmlah_x", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, - ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1517,10 +1508,10 @@ def SVSHLLB_U_N : SInst<"svshllb[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_ def SVSHLLT_S_N : SInst<"svshllt[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllt", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; def SVSHLLT_U_N : SInst<"svshllt[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllt", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil", MergeNone, "", [VerifyRuntimeMode]>; -def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone, "", [VerifyRuntimeMode]>; -def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil", MergeNone, "", [VerifyRuntimeMode]>; -def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone, "", [VerifyRuntimeMode]>; +def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil", MergeNone, "", [VerifyRuntimeMode]>; +def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone, "", [VerifyRuntimeMode]>; +def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil", MergeNone, "", [VerifyRuntimeMode]>; +def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone, "", [VerifyRuntimeMode]>; def SVMLALB_S_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; def SVMLALB_U_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; @@ -1546,14 +1537,14 @@ def SVQDMULLT_LANE : SInst<"svqdmullt_lane[_{d}]", "dhhi", "il", MergeNone, " // SVE2 - Narrowing DSP operations let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVADDHNB : SInst<"svaddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [VerifyRuntimeMode]>; -def SVADDHNT : SInst<"svaddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [VerifyRuntimeMode]>; -def SVRADDHNB : SInst<"svraddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [VerifyRuntimeMode]>; -def SVRADDHNT : SInst<"svraddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt", [VerifyRuntimeMode]>; -def SVRSUBHNB : SInst<"svrsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb", [VerifyRuntimeMode]>; -def SVRSUBHNT : SInst<"svrsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt", [VerifyRuntimeMode]>; -def SVSUBHNB : SInst<"svsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [VerifyRuntimeMode]>; -def SVSUBHNT : SInst<"svsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [VerifyRuntimeMode]>; +def SVADDHNB : SInst<"svaddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [VerifyRuntimeMode]>; +def SVADDHNT : SInst<"svaddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [VerifyRuntimeMode]>; +def SVRADDHNB : SInst<"svraddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [VerifyRuntimeMode]>; +def SVRADDHNT : SInst<"svraddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt", [VerifyRuntimeMode]>; +def SVRSUBHNB : SInst<"svrsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb", [VerifyRuntimeMode]>; +def SVRSUBHNT : SInst<"svrsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt", [VerifyRuntimeMode]>; +def SVSUBHNB : SInst<"svsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [VerifyRuntimeMode]>; +def SVSUBHNT : SInst<"svsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [VerifyRuntimeMode]>; def SVADDHNB_N : SInst<"svaddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [VerifyRuntimeMode]>; def SVADDHNT_N : SInst<"svaddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [VerifyRuntimeMode]>; @@ -1564,35 +1555,35 @@ def SVRSUBHNT_N : SInst<"svrsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aa def SVSUBHNB_N : SInst<"svsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [VerifyRuntimeMode]>; def SVSUBHNT_N : SInst<"svsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [VerifyRuntimeMode]>; -def SVSHRNB : SInst<"svshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVRSHRNB : SInst<"svrshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRUNB : SInst<"svqshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqshrunb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRUNB : SInst<"svqrshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqrshrunb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRNB_S : SInst<"svqshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRNB_U : SInst<"svqshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRNB_S : SInst<"svqrshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqrshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRNB_U : SInst<"svqrshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; - -def SVSHRNT : SInst<"svshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVRSHRNT : SInst<"svrshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRUNT : SInst<"svqshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqshrunt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRUNT : SInst<"svqrshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqrshrunt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRNT_S : SInst<"svqshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRNT_U : SInst<"svqshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRNT_S : SInst<"svqrshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqrshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRNT_U : SInst<"svqrshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVSHRNB : SInst<"svshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVRSHRNB : SInst<"svrshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRUNB : SInst<"svqshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqshrunb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRUNB : SInst<"svqrshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqrshrunb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRNB_S : SInst<"svqshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRNB_U : SInst<"svqshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRNB_S : SInst<"svqrshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqrshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRNB_U : SInst<"svqrshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnb", [VerifyRuntimeMode], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + +def SVSHRNT : SInst<"svshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVRSHRNT : SInst<"svrshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRUNT : SInst<"svqshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqshrunt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRUNT : SInst<"svqrshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqrshrunt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRNT_S : SInst<"svqshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRNT_U : SInst<"svqshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRNT_S : SInst<"svqrshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqrshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRNT_U : SInst<"svqrshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnt", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Unary narrowing operations let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVQXTNB_S : SInst<"svqxtnb[_{d}]", "hd", "sil", MergeNone, "aarch64_sve_sqxtnb", [VerifyRuntimeMode]>; -def SVQXTNB_U : SInst<"svqxtnb[_{d}]", "hd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnb", [VerifyRuntimeMode]>; -def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed", "sil", MergeNone, "aarch64_sve_sqxtunb", [VerifyRuntimeMode]>; +def SVQXTNB_S : SInst<"svqxtnb[_{d}]", "hd", "sil", MergeNone, "aarch64_sve_sqxtnb", [VerifyRuntimeMode]>; +def SVQXTNB_U : SInst<"svqxtnb[_{d}]", "hd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnb", [VerifyRuntimeMode]>; +def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed", "sil", MergeNone, "aarch64_sve_sqxtunb", [VerifyRuntimeMode]>; -def SVQXTNT_S : SInst<"svqxtnt[_{d}]", "hhd", "sil", MergeNone, "aarch64_sve_sqxtnt", [VerifyRuntimeMode]>; -def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnt", [VerifyRuntimeMode]>; -def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt", [VerifyRuntimeMode]>; +def SVQXTNT_S : SInst<"svqxtnt[_{d}]", "hhd", "sil", MergeNone, "aarch64_sve_sqxtnt", [VerifyRuntimeMode]>; +def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnt", [VerifyRuntimeMode]>; +def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt", [VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1737,16 +1728,17 @@ def SVEORBT : SInst<"sveorbt[_{d}]", "dddd", "csilUcUsUiUl", Mer def SVEORBT_N : SInst<"sveorbt[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [VerifyRuntimeMode]>; def SVEORTB : SInst<"sveortb[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [VerifyRuntimeMode]>; def SVEORTB_N : SInst<"sveortb[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [VerifyRuntimeMode]>; -def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul", [VerifyRuntimeMode]>; -def SVPMUL_N : SInst<"svpmul[_n_{d}]", "dda", "Uc", MergeNone, "aarch64_sve_pmul", [VerifyRuntimeMode]>; -def SVPMULLB : SInst<"svpmullb[_{d}]", "dhh", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; -def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; -def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>; -def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>; -def SVPMULLT : SInst<"svpmullt[_{d}]", "dhh", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; -def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; -def SVPMULLT_PAIR : SInst<"svpmullt_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; -def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; + +def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul", [VerifyRuntimeMode]>; +def SVPMUL_N : SInst<"svpmul[_n_{d}]", "dda", "Uc", MergeNone, "aarch64_sve_pmul", [VerifyRuntimeMode]>; +def SVPMULLB : SInst<"svpmullb[_{d}]", "dhh", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; +def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; +def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>; +def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [VerifyRuntimeMode]>; +def SVPMULLT : SInst<"svpmullt[_{d}]", "dhh", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; +def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone, "", [VerifyRuntimeMode]>; +def SVPMULLT_PAIR : SInst<"svpmullt_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; +def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullt_pair", [VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1754,43 +1746,42 @@ def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", Mer let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { def SVCDOT : SInst<"svcdot[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_cdot", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [VerifyRuntimeMode], [ImmCheck<4, ImmCheckComplexRotAll90>, - ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [VerifyRuntimeMode], [ImmCheck<4, ImmCheckComplexRotAll90>, ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Floating-point widening multiply-accumulate let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb", [VerifyRuntimeMode]>; -def SVMLALB_F_N : SInst<"svmlalb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalb", [VerifyRuntimeMode]>; -def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt", [VerifyRuntimeMode]>; -def SVMLALT_F_N : SInst<"svmlalt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalt", [VerifyRuntimeMode]>; -def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb", [VerifyRuntimeMode]>; -def SVMLSLB_F_N : SInst<"svmlslb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslb", [VerifyRuntimeMode]>; -def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt", [VerifyRuntimeMode]>; -def SVMLSLT_F_N : SInst<"svmlslt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslt", [VerifyRuntimeMode]>; -def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb", [VerifyRuntimeMode]>; +def SVMLALB_F_N : SInst<"svmlalb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalb", [VerifyRuntimeMode]>; +def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt", [VerifyRuntimeMode]>; +def SVMLALT_F_N : SInst<"svmlalt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalt", [VerifyRuntimeMode]>; +def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb", [VerifyRuntimeMode]>; +def SVMLSLB_F_N : SInst<"svmlslb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslb", [VerifyRuntimeMode]>; +def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt", [VerifyRuntimeMode]>; +def SVMLSLT_F_N : SInst<"svmlslt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslt", [VerifyRuntimeMode]>; +def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Floating-point integer binary logarithm let SVETargetGuard = "sve2", SMETargetGuard = "sme" in { -def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb", [VerifyRuntimeMode]>; -def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb", [VerifyRuntimeMode]>; -def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb", [VerifyRuntimeMode]>; +def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb", [VerifyRuntimeMode]>; +def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb", [VerifyRuntimeMode]>; +def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb", [VerifyRuntimeMode]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Vector Histogram count let SVETargetGuard = "sve2", SMETargetGuard = InvalidMode in { -def SVHISTCNT : SInst<"svhistcnt[_{d}]_z", "uPdd", "ilUiUl", MergeNone, "aarch64_sve_histcnt">; -def SVHISTSEG : SInst<"svhistseg[_{d}]", "udd", "cUc", MergeNone, "aarch64_sve_histseg">; +def SVHISTCNT : SInst<"svhistcnt[_{d}]_z", "uPdd", "ilUiUl", MergeNone, "aarch64_sve_histcnt">; +def SVHISTSEG : SInst<"svhistseg[_{d}]", "udd", "cUc", MergeNone, "aarch64_sve_histseg">; } //////////////////////////////////////////////////////////////////////////////// @@ -1825,10 +1816,10 @@ def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfdb", MergeNone, "aarch //////////////////////////////////////////////////////////////////////////////// // SVE2 - Lookup table let SVETargetGuard = "sve2,lut", SMETargetGuard = "sme2,lut" in { - def SVLUTI2_B : SInst<"svluti2_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; + def SVLUTI2_B : SInst<"svluti2_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; def SVLUTI2_H : SInst<"svluti2_lane[_{d}]", "dd[i", "sUshb", MergeNone, "aarch64_sve_luti2_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_7>]>; - def SVLUTI4_B : SInst<"svluti4_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>; + def SVLUTI4_B : SInst<"svluti4_lane[_{d}]", "dd[i", "cUc", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_1>]>; def SVLUTI4_H : SInst<"svluti4_lane[_{d}]", "dd[i", "sUshb", MergeNone, "aarch64_sve_luti4_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; def SVLUTI4_x2 : SInst<"svluti4_lane[_{d}_x2]", "d2.d[i", "sUshb", MergeNone, "aarch64_sve_luti4_lane_x2", [VerifyRuntimeMode], [ImmCheck<2, ImmCheck0_3>]>; @@ -1851,7 +1842,7 @@ def SVPMULLT_PAIR_N_U64 : SInst<"svpmullt_pair[_n_{d}]", "dda", "Ul", MergeNone, } let SVETargetGuard = "sve-sha3", SMETargetGuard = "sve-sha3,sme2p1" in { -def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>; +def SVRAX1 : SInst<"svrax1[_{d}]", "ddd", "lUl", MergeNone, "aarch64_sve_rax1", [IsOverloadNone, VerifyRuntimeMode]>; } let SVETargetGuard = "sve2-sm4", SMETargetGuard = InvalidMode in { @@ -1894,7 +1885,7 @@ def SVFMINQV : SInst<"svminqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_ } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { -def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; +def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; def SVPSEL_COUNT_ALIAS_B : SInst<"svpsel_lane_c8", "}}Pm", "Pc", MergeNone, "", [VerifyRuntimeMode], []>; @@ -1902,31 +1893,31 @@ def SVPSEL_COUNT_ALIAS_H : SInst<"svpsel_lane_c16", "}}Pm", "Ps", MergeNone, "", def SVPSEL_COUNT_ALIAS_S : SInst<"svpsel_lane_c32", "}}Pm", "Pi", MergeNone, "", [VerifyRuntimeMode], []>; def SVPSEL_COUNT_ALIAS_D : SInst<"svpsel_lane_c64", "}}Pm", "Pl", MergeNone, "", [VerifyRuntimeMode], []>; -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilele_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<2, ImmCheck2_4_Mul2>]>; } multiclass MultiVecLoad<string i> { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; @@ -1938,20 +1929,20 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { } multiclass MultiVecStore<string i> { - def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; - def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; + def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">; - def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; + def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">; @@ -1966,11 +1957,12 @@ let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { def SVDOT_X2_S : SInst<"svdot[_{d}_{2}]", "ddhh", "i", MergeNone, "aarch64_sve_sdot_x2", [VerifyRuntimeMode], []>; def SVDOT_X2_U : SInst<"svdot[_{d}_{2}]", "ddhh", "Ui", MergeNone, "aarch64_sve_udot_x2", [VerifyRuntimeMode], []>; def SVDOT_X2_F : SInst<"svdot[_{d}_{2}]", "ddhh", "f", MergeNone, "aarch64_sve_fdot_x2", [VerifyRuntimeMode], []>; + def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_3>]>; -def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>; +def SVFCLAMP : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>; } let SVETargetGuard = "sve2p1|sme", SMETargetGuard = "sve2p1|sme" in { @@ -1981,27 +1973,28 @@ defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlmbhfd", "aarch64_sve_revd">; } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { - def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, VerifyRuntimeMode], []>; + def SVPTRUE_COUNT : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, VerifyRuntimeMode], []>; + def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<1, ImmCheck2_4_Mul2>]>; def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, VerifyRuntimeMode]>; - - def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } let SVETargetGuard = "sve2,sve-b16b16", SMETargetGuard = "sme2,sve-b16b16" in { -defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u", [VerifyRuntimeMode]>; -defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u", [VerifyRuntimeMode]>; -defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u", [VerifyRuntimeMode]>; -defm SVMAXNM_BF : SInstZPZZ<"svmaxnm","b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u", [VerifyRuntimeMode]>; -defm SVMINNM_BF : SInstZPZZ<"svminnm","b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u", [VerifyRuntimeMode]>; -defm SVMAX_BF : SInstZPZZ<"svmax", "b", "aarch64_sve_fmax", "aarch64_sve_fmax_u", [VerifyRuntimeMode]>; -defm SVMIN_BF : SInstZPZZ<"svmin", "b", "aarch64_sve_fmin", "aarch64_sve_fmin_u", [VerifyRuntimeMode]>; -defm SVMLA_BF : SInstZPZZZ<"svmla", "b", "aarch64_sve_fmla", "aarch64_sve_fmla_u", [VerifyRuntimeMode]>; -defm SVMLS_BF : SInstZPZZZ<"svmls", "b", "aarch64_sve_fmls", "aarch64_sve_fmls_u", [VerifyRuntimeMode]>; -def SVMLA_LANE_BF : SInst<"svmla_lane[_{d}]", "ddddi", "b", MergeNone, "aarch64_sve_fmla_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE_BF : SInst<"svmls_lane[_{d}]", "ddddi", "b", MergeNone, "aarch64_sve_fmls_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE_BF : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>; +defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u", [VerifyRuntimeMode]>; +defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u", [VerifyRuntimeMode]>; +defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u", [VerifyRuntimeMode]>; +defm SVMAXNM_BF : SInstZPZZ<"svmaxnm", "b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u", [VerifyRuntimeMode]>; +defm SVMINNM_BF : SInstZPZZ<"svminnm", "b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u", [VerifyRuntimeMode]>; +defm SVMAX_BF : SInstZPZZ<"svmax", "b", "aarch64_sve_fmax", "aarch64_sve_fmax_u", [VerifyRuntimeMode]>; +defm SVMIN_BF : SInstZPZZ<"svmin", "b", "aarch64_sve_fmin", "aarch64_sve_fmin_u", [VerifyRuntimeMode]>; +defm SVMLA_BF : SInstZPZZZ<"svmla", "b", "aarch64_sve_fmla", "aarch64_sve_fmla_u", [VerifyRuntimeMode]>; +defm SVMLS_BF : SInstZPZZZ<"svmls", "b", "aarch64_sve_fmls", "aarch64_sve_fmls_u", [VerifyRuntimeMode]>; + +def SVMLA_LANE_BF : SInst<"svmla_lane[_{d}]", "ddddi", "b", MergeNone, "aarch64_sve_fmla_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE_BF : SInst<"svmls_lane[_{d}]", "ddddi", "b", MergeNone, "aarch64_sve_fmls_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE_BF : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; + +def SVFCLAMP_BF : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_fclamp", [VerifyRuntimeMode], []>; } // SME2 @@ -2059,13 +2052,13 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { - def SVSCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x2", [IsStreaming], []>; - def SVUCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x2", [IsStreaming], []>; - def SVFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x2", [IsStreaming], []>; + def SVSCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x2", [IsStreaming], []>; + def SVUCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x2", [IsStreaming], []>; + def SVFCLAMP_X2 : SInst<"svclamp[_single_{d}_x2]", "22dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x2", [IsStreaming], []>; - def SVSCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x4", [IsStreaming], []>; - def SVUCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x4", [IsStreaming], []>; - def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; + def SVSCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x4", [IsStreaming], []>; + def SVUCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x4", [IsStreaming], []>; + def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; } multiclass BfSingleMultiVector<string name> { @@ -2090,13 +2083,13 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,sve-b16b16"in { } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { -// == ADD (vectors) == + // == ADD (vectors) == def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>; def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>; // 2-way and 4-way selects - def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>; - def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>; + def SVSEL_X2 : SInst<"svsel[_{d}_x2]", "2}22", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x2", [IsStreaming], []>; + def SVSEL_X4 : SInst<"svsel[_{d}_x4]", "4}44", "cUcsUsiUilUlmbhfd", MergeNone, "aarch64_sve_sel_x4", [IsStreaming], []>; // SRSHL / URSHL def SVSRSHL_SINGLE_X2 : SInst<"svrshl[_single_{d}_x2]", "22d", "csil", MergeNone, "aarch64_sve_srshl_single_x2", [IsStreaming], []>; @@ -2109,8 +2102,8 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVSRSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "csil", MergeNone, "aarch64_sve_srshl_x4", [IsStreaming], []>; def SVURSHL_X4 : SInst<"svrshl[_{d}_x4]", "444", "UcUsUiUl", MergeNone, "aarch64_sve_urshl_x4", [IsStreaming], []>; - def SVQRSHRN_X4 : SInst<"svqrshrn[_n]_{0}[_{d}_x4]", "q4i", "il", MergeNone, "aarch64_sve_sqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; - def SVUQRSHRN_X4 : SInst<"svqrshrn[_n]_{0}[_{d}_x4]", "b4i", "UiUl", MergeNone, "aarch64_sve_uqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + def SVQRSHRN_X4 : SInst<"svqrshrn[_n]_{0}[_{d}_x4]", "q4i", "il", MergeNone, "aarch64_sve_sqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; + def SVUQRSHRN_X4 : SInst<"svqrshrn[_n]_{0}[_{d}_x4]", "b4i", "UiUl", MergeNone, "aarch64_sve_uqrshrn_x4", [IsStreaming], [ImmCheck<1, ImmCheckShiftRight, 0>]>; // SQRSHR / UQRSHR def SVQRSHR_X2 : SInst<"svqrshr[_n]_{0}[_{d}_x2]", "h2i", "i", MergeNone, "aarch64_sve_sqrshr_x2", [IsStreaming], [ImmCheck<1, ImmCheck1_16>]>; @@ -2144,32 +2137,29 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { } let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { - // SQRSHRN / UQRSHRN - def SVQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "i", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; - def SVUQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Ui", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; - - // SQRSHRUN - def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; + def SVQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "h2i", "i", MergeNone, "aarch64_sve_sqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; + def SVUQRSHRN_X2 : SInst<"svqrshrn[_n]_{0}[_{d}_x2]", "e2i", "Ui", MergeNone, "aarch64_sve_uqrshrn_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; + def SVSQRSHRUN_X2 : SInst<"svqrshrun[_n]_{0}[_{d}_x2]", "e2i", "i", MergeNone, "aarch64_sve_sqrshrun_x2", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck1_16>]>; } let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { - // ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 def SVZIPQ1 : SInst<"svzipq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq1", [VerifyRuntimeMode], []>; def SVZIPQ2 : SInst<"svzipq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_zipq2", [VerifyRuntimeMode], []>; def SVUZPQ1 : SInst<"svuzpq1[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq1", [VerifyRuntimeMode], []>; def SVUZPQ2 : SInst<"svuzpq2[_{d}]", "ddd", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_uzpq2", [VerifyRuntimeMode], []>; - // TBLQ, TBXQ - def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tblq", [VerifyRuntimeMode]>; + + def SVTBLQ : SInst<"svtblq[_{d}]", "ddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tblq", [VerifyRuntimeMode]>; def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_tbxq", [VerifyRuntimeMode]>; - // EXTQ + def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfdm", MergeNone, "aarch64_sve_extq", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckLaneIndex, 0>]>; - // PMOV + // Move to Pred multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > { def _LANE : Inst<name # "_lane[_{d}]", "Pdi", types, MergeNone, intrinsic, flags, [ImmCheck<1, immCh>]>; def _LANE_ZERO : SInst<name # "[_{d}]", "Pd", types, MergeNone, intrinsic # "_zero", flags, []>; } + defm SVPMOV_B_TO_PRED : PMOV_TO_PRED<"svpmov", "cUc", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_0>; defm SVPMOV_H_TO_PRED : PMOV_TO_PRED<"svpmov", "sUs", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_1>; defm SVPMOV_S_TO_PRED : PMOV_TO_PRED<"svpmov", "iUi", "aarch64_sve_pmov_to_pred_lane", [VerifyRuntimeMode], ImmCheck0_3>; @@ -2180,6 +2170,7 @@ let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { def _M : SInst<name # "_lane[_{d}]", "ddPi", types, MergeOp1, intrinsic # "_merging", flags, [ImmCheck<2, immCh>]>; def _Z : SInst<name # "_{d}_z", "dP", types, MergeNone, intrinsic # "_zeroing", flags, []>; } + def SVPMOV_TO_VEC_LANE_B : SInst<"svpmov_{d}_z", "dP", "cUc", MergeNone, "aarch64_sve_pmov_to_vector_lane_zeroing", [VerifyRuntimeMode], []>; defm SVPMOV_TO_VEC_LANE_H : PMOV_TO_VEC<"svpmov", "sUs", "aarch64_sve_pmov_to_vector_lane", [VerifyRuntimeMode], ImmCheck1_1>; defm SVPMOV_TO_VEC_LANE_S : PMOV_TO_VEC<"svpmov", "iUi", "aarch64_sve_pmov_to_vector_lane", [VerifyRuntimeMode], ImmCheck1_3>; @@ -2187,18 +2178,17 @@ let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { } let SVETargetGuard = "sve2p1|sme2p1", SMETargetGuard = "sve2p1|sme2p1" in { - // DUPQ - def SVDUP_LANEQ_B : SInst<"svdup_laneq[_{d}]", "ddi", "cUcm", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>; - def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUshb", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>; - def SVDUP_LANEQ_S : SInst<"svdup_laneq[_{d}]", "ddi", "iUif", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; - def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; + def SVDUP_LANEQ_B : SInst<"svdup_laneq[_{d}]", "ddi", "cUcm", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>; + def SVDUP_LANEQ_H : SInst<"svdup_laneq[_{d}]", "ddi", "sUshb", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>; + def SVDUP_LANEQ_S : SInst<"svdup_laneq[_{d}]", "ddi", "iUif", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>; + def SVDUP_LANEQ_D : SInst<"svdup_laneq[_{d}]", "ddi", "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>; } // // Multi-vector convert to/from floating-point. // let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { - def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "h2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>; + def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "h2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>; def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>; def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; @@ -2220,7 +2210,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in { // Multi-vector floating-point convert from single-precision to interleaved half-precision/BFloat16 // let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { - def SVCVTN_F16_X2 : SInst<"svcvtn_f16[_f32_x2]", "h2", "f", MergeNone, "aarch64_sve_fcvtn_x2", [IsStreaming],[]>; + def SVCVTN_F16_X2 : SInst<"svcvtn_f16[_f32_x2]", "h2", "f", MergeNone, "aarch64_sve_fcvtn_x2", [IsStreaming],[]>; def SVCVTN_BF16_X2 : SInst<"svcvtn_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvtn_x2", [IsOverloadNone, IsStreaming],[]>; } @@ -2235,36 +2225,36 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme-f16f16" in { // Multi-vector saturating extract narrow // let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { - def SVQCVT_S16_S32_X2 : SInst<"svqcvt_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvt_x2", [IsStreaming], []>; + def SVQCVT_S16_S32_X2 : SInst<"svqcvt_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvt_x2", [IsStreaming], []>; def SVQCVT_U16_U32_X2 : SInst<"svqcvt_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvt_x2", [IsStreaming], []>; - def SVQCVT_U16_S32_X2 : SInst<"svqcvt_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtu_x2", [IsStreaming], []>; + def SVQCVT_U16_S32_X2 : SInst<"svqcvt_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtu_x2", [IsStreaming], []>; - def SVQCVT_S8_S32_X4 : SInst<"svqcvt_s8[_{d}_x4]", "q4.d", "i", MergeNone, "aarch64_sve_sqcvt_x4", [IsStreaming], []>; + def SVQCVT_S8_S32_X4 : SInst<"svqcvt_s8[_{d}_x4]", "q4.d", "i", MergeNone, "aarch64_sve_sqcvt_x4", [IsStreaming], []>; def SVQCVT_U8_U32_X4 : SInst<"svqcvt_u8[_{d}_x4]", "b4.d", "Ui", MergeNone, "aarch64_sve_uqcvt_x4", [IsStreaming], []>; - def SVQCVT_U8_S32_X4 : SInst<"svqcvt_u8[_{d}_x4]", "b4.d", "i", MergeNone, "aarch64_sve_sqcvtu_x4", [IsStreaming], []>; + def SVQCVT_U8_S32_X4 : SInst<"svqcvt_u8[_{d}_x4]", "b4.d", "i", MergeNone, "aarch64_sve_sqcvtu_x4", [IsStreaming], []>; - def SVQCVT_S16_S64_X4 : SInst<"svqcvt_s16[_{d}_x4]", "q4.d", "l", MergeNone, "aarch64_sve_sqcvt_x4", [IsStreaming], []>; + def SVQCVT_S16_S64_X4 : SInst<"svqcvt_s16[_{d}_x4]", "q4.d", "l", MergeNone, "aarch64_sve_sqcvt_x4", [IsStreaming], []>; def SVQCVT_U16_U64_X4 : SInst<"svqcvt_u16[_{d}_x4]", "b4.d", "Ul", MergeNone, "aarch64_sve_uqcvt_x4", [IsStreaming], []>; - def SVQCVT_U16_S64_X4 : SInst<"svqcvt_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtu_x4", [IsStreaming], []>; + def SVQCVT_U16_S64_X4 : SInst<"svqcvt_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtu_x4", [IsStreaming], []>; } // // Multi-vector saturating extract narrow and interleave // let SVETargetGuard = "sve2p1|sme2", SMETargetGuard = "sve2p1|sme2" in { - def SVQCVTN_S16_S32_X2 : SInst<"svqcvtn_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvtn_x2", [VerifyRuntimeMode], []>; + def SVQCVTN_S16_S32_X2 : SInst<"svqcvtn_s16[_{d}_x2]", "h2.d", "i", MergeNone, "aarch64_sve_sqcvtn_x2", [VerifyRuntimeMode], []>; def SVQCVTN_U16_U32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "Ui", MergeNone, "aarch64_sve_uqcvtn_x2", [VerifyRuntimeMode], []>; - def SVQCVTN_U16_S32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtun_x2", [VerifyRuntimeMode], []>; + def SVQCVTN_U16_S32_X2 : SInst<"svqcvtn_u16[_{d}_x2]", "e2.d", "i", MergeNone, "aarch64_sve_sqcvtun_x2", [VerifyRuntimeMode], []>; } let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { - def SVQCVTN_S8_S32_X4 : SInst<"svqcvtn_s8[_{d}_x4]", "q4.d", "i", MergeNone, "aarch64_sve_sqcvtn_x4", [IsStreaming], []>; + def SVQCVTN_S8_S32_X4 : SInst<"svqcvtn_s8[_{d}_x4]", "q4.d", "i", MergeNone, "aarch64_sve_sqcvtn_x4", [IsStreaming], []>; def SVQCVTN_U8_U32_X4 : SInst<"svqcvtn_u8[_{d}_x4]", "b4.d", "Ui", MergeNone, "aarch64_sve_uqcvtn_x4", [IsStreaming], []>; - def SVQCVTN_U8_S32_X4 : SInst<"svqcvtn_u8[_{d}_x4]", "b4.d", "i", MergeNone, "aarch64_sve_sqcvtun_x4", [IsStreaming], []>; + def SVQCVTN_U8_S32_X4 : SInst<"svqcvtn_u8[_{d}_x4]", "b4.d", "i", MergeNone, "aarch64_sve_sqcvtun_x4", [IsStreaming], []>; - def SVQCVTN_S16_S64_X4 : SInst<"svqcvtn_s16[_{d}_x4]", "q4.d", "l", MergeNone, "aarch64_sve_sqcvtn_x4", [IsStreaming], []>; + def SVQCVTN_S16_S64_X4 : SInst<"svqcvtn_s16[_{d}_x4]", "q4.d", "l", MergeNone, "aarch64_sve_sqcvtn_x4", [IsStreaming], []>; def SVQCVTN_U16_U64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "Ul", MergeNone, "aarch64_sve_uqcvtn_x4", [IsStreaming], []>; - def SVQCVTN_U16_S64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtun_x4", [IsStreaming], []>; + def SVQCVTN_U16_S64_X4 : SInst<"svqcvtn_u16[_{d}_x4]", "b4.d", "l", MergeNone, "aarch64_sve_sqcvtun_x4", [IsStreaming], []>; } // @@ -2298,6 +2288,7 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { // Multi-vector scaling def FSCALE_SINGLE_X2 : Inst<"svscale[_single_{d}_x2]", "22x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x2", [IsStreaming],[]>; def FSCALE_SINGLE_X4 : Inst<"svscale[_single_{d}_x4]", "44x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x4", [IsStreaming],[]>; + def FSCALE_X2 : Inst<"svscale[_{d}_x2]", "222.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x2", [IsStreaming],[]>; def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; diff --git a/clang/include/clang/CIR/CIRGenerator.h b/clang/include/clang/CIR/CIRGenerator.h index dd48eec..5ea1146 100644 --- a/clang/include/clang/CIR/CIRGenerator.h +++ b/clang/include/clang/CIR/CIRGenerator.h @@ -79,7 +79,10 @@ public: void HandleTranslationUnit(clang::ASTContext &astContext) override; void HandleInlineFunctionDefinition(clang::FunctionDecl *d) override; void HandleTagDeclDefinition(clang::TagDecl *d) override; + void HandleTagDeclRequiredDefinition(const clang::TagDecl *D) override; + void HandleCXXStaticMemberVarInstantiation(clang::VarDecl *D) override; void CompleteTentativeDefinition(clang::VarDecl *d) override; + void HandleVTable(clang::CXXRecordDecl *rd) override; mlir::ModuleOp getModule() const; mlir::MLIRContext &getMLIRContext() { return *mlirContext; }; diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 5ef5b60..72841a1 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -291,6 +291,9 @@ def CIR_ConstantOp : CIR_Op<"const", [ return ptrAttr.isNullValue(); return false; } + + template <typename T> + T getValueAttr() { return mlir::dyn_cast<T>(getValue()); } }]; let hasFolder = 1; diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td index edd21b5..a258df7 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td @@ -282,6 +282,35 @@ def CIR_PointerType : CIR_Type<"Pointer", "ptr", [ } //===----------------------------------------------------------------------===// +// CIR_VPtrType +//===----------------------------------------------------------------------===// + +def CIR_VPtrType : CIR_Type<"VPtr", "vptr", [ + DeclareTypeInterfaceMethods<DataLayoutTypeInterface> +]> { + let summary = "CIR type that is used for the vptr member of C++ objects"; + let description = [{ + `cir.vptr` is a special type used as the type for the vptr member of a C++ + object. This avoids using arbitrary pointer types to declare vptr values + and allows stronger type-based checking for operations that use or provide + access to the vptr. + + This type will be the element type of the 'vptr' member of structures that + require a vtable pointer. A pointer to this type is returned by the + `cir.vtable.address_point` and `cir.vtable.get_vptr` operations, and this + pointer may be passed to the `cir.vtable.get_virtual_fn_addr` operation to + get the address of a virtual function pointer. + + The pointer may also be cast to other pointer types in order to perform + pointer arithmetic based on information encoded in the AST layout to get + the offset from a pointer to a dynamic object to the base object pointer, + the base object offset value from the vtable, or the type information + entry for an object. + TODO: We should have special operations to do that too. + }]; +} + +//===----------------------------------------------------------------------===// // BoolType //===----------------------------------------------------------------------===// @@ -635,7 +664,7 @@ def CIRRecordType : Type< def CIR_AnyType : AnyTypeOf<[ CIR_VoidType, CIR_BoolType, CIR_ArrayType, CIR_VectorType, CIR_IntType, CIR_AnyFloatType, CIR_PointerType, CIR_FuncType, CIR_RecordType, - CIR_ComplexType + CIR_ComplexType, CIR_VPtrType ]>; #endif // CLANG_CIR_DIALECT_IR_CIRTYPES_TD diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index adc7b5f..27dd181 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -223,6 +223,7 @@ struct MissingFeatures { static bool lowerAggregateLoadStore() { return false; } static bool lowerModeOptLevel() { return false; } static bool maybeHandleStaticInExternC() { return false; } + static bool mergeAllConstants() { return false; } static bool metaDataNode() { return false; } static bool moduleNameHash() { return false; } static bool msabi() { return false; } diff --git a/clang/lib/AST/ByteCode/DynamicAllocator.cpp b/clang/lib/AST/ByteCode/DynamicAllocator.cpp index 169250c..9b8b664 100644 --- a/clang/lib/AST/ByteCode/DynamicAllocator.cpp +++ b/clang/lib/AST/ByteCode/DynamicAllocator.cpp @@ -13,6 +13,25 @@ using namespace clang; using namespace clang::interp; +// FIXME: There is a peculiar problem with the way we track pointers +// to blocks and the way we allocate dynamic memory. +// +// When we have code like this: +// while (true) { +// char *buffer = new char[1024]; +// delete[] buffer; +// } +// +// We have a local variable 'buffer' pointing to the heap allocated memory. +// When deallocating the memory via delete[], that local variable still +// points to the memory, which means we will create a DeadBlock for it and move +// it over to that block, essentially duplicating the allocation. Moving +// the data is also slow. +// +// However, when we actually try to access the allocation after it has been +// freed, we need the block to still exist (alive or dead) so we can tell +// that it's a dynamic allocation. + DynamicAllocator::~DynamicAllocator() { cleanup(); } void DynamicAllocator::cleanup() { diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 993b64b..20b7c44 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16858,31 +16858,31 @@ static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result) { CheckMemoryLeaks(Info); } -static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, +static bool FastEvaluateAsRValue(const Expr *Exp, APValue &Result, const ASTContext &Ctx, bool &IsConst) { // Fast-path evaluations of integer literals, since we sometimes see files // containing vast quantities of these. if (const auto *L = dyn_cast<IntegerLiteral>(Exp)) { - Result.Val = APValue(APSInt(L->getValue(), - L->getType()->isUnsignedIntegerType())); + Result = + APValue(APSInt(L->getValue(), L->getType()->isUnsignedIntegerType())); IsConst = true; return true; } if (const auto *L = dyn_cast<CXXBoolLiteralExpr>(Exp)) { - Result.Val = APValue(APSInt(APInt(1, L->getValue()))); + Result = APValue(APSInt(APInt(1, L->getValue()))); IsConst = true; return true; } if (const auto *FL = dyn_cast<FloatingLiteral>(Exp)) { - Result.Val = APValue(FL->getValue()); + Result = APValue(FL->getValue()); IsConst = true; return true; } if (const auto *L = dyn_cast<CharacterLiteral>(Exp)) { - Result.Val = APValue(Ctx.MakeIntValue(L->getValue(), L->getType())); + Result = APValue(Ctx.MakeIntValue(L->getValue(), L->getType())); IsConst = true; return true; } @@ -16891,7 +16891,7 @@ static bool FastEvaluateAsRValue(const Expr *Exp, Expr::EvalResult &Result, if (CE->hasAPValueResult()) { APValue APV = CE->getAPValueResult(); if (!APV.isLValue()) { - Result.Val = std::move(APV); + Result = std::move(APV); IsConst = true; return true; } @@ -16921,7 +16921,7 @@ static bool EvaluateAsRValue(const Expr *E, Expr::EvalResult &Result, const ASTContext &Ctx, EvalInfo &Info) { assert(!E->isValueDependent()); bool IsConst; - if (FastEvaluateAsRValue(E, Result, Ctx, IsConst)) + if (FastEvaluateAsRValue(E, Result.Val, Ctx, IsConst)) return IsConst; return EvaluateAsRValue(Info, E, Result.Val); @@ -17078,7 +17078,8 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx, assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); bool IsConst; - if (FastEvaluateAsRValue(this, Result, Ctx, IsConst) && Result.Val.hasValue()) + if (FastEvaluateAsRValue(this, Result.Val, Ctx, IsConst) && + Result.Val.hasValue()) return true; ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateAsConstantExpr"); @@ -17293,7 +17294,7 @@ void Expr::EvaluateForOverflow(const ASTContext &Ctx) const { ExprTimeTraceScope TimeScope(this, Ctx, "EvaluateForOverflow"); bool IsConst; EvalResult EVResult; - if (!FastEvaluateAsRValue(this, EVResult, Ctx, IsConst)) { + if (!FastEvaluateAsRValue(this, EVResult.Val, Ctx, IsConst)) { EvalInfo Info(Ctx, EVResult, EvalInfo::EM_IgnoreSideEffects); Info.CheckingForUndefinedBehavior = true; (void)::EvaluateAsRValue(Info, this, EVResult.Val); @@ -17312,8 +17313,7 @@ bool Expr::EvalResult::isGlobalLValue() const { /// comma, etc // CheckICE - This function does the fundamental ICE checking: the returned -// ICEDiag contains an ICEKind indicating whether the expression is an ICE, -// and a (possibly null) SourceLocation indicating the location of the problem. +// ICEDiag contains an ICEKind indicating whether the expression is an ICE. // // Note that to reduce code duplication, this helper does no evaluation // itself; the caller checks whether the expression is evaluatable, and @@ -17777,60 +17777,51 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { /// Evaluate an expression as a C++11 integral constant expression. static bool EvaluateCPlusPlus11IntegralConstantExpr(const ASTContext &Ctx, const Expr *E, - llvm::APSInt *Value, - SourceLocation *Loc) { - if (!E->getType()->isIntegralOrUnscopedEnumerationType()) { - if (Loc) *Loc = E->getExprLoc(); + llvm::APSInt *Value) { + if (!E->getType()->isIntegralOrUnscopedEnumerationType()) return false; - } APValue Result; - if (!E->isCXX11ConstantExpr(Ctx, &Result, Loc)) + if (!E->isCXX11ConstantExpr(Ctx, &Result)) return false; - if (!Result.isInt()) { - if (Loc) *Loc = E->getExprLoc(); + if (!Result.isInt()) return false; - } if (Value) *Value = Result.getInt(); return true; } -bool Expr::isIntegerConstantExpr(const ASTContext &Ctx, - SourceLocation *Loc) const { +bool Expr::isIntegerConstantExpr(const ASTContext &Ctx) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); ExprTimeTraceScope TimeScope(this, Ctx, "isIntegerConstantExpr"); if (Ctx.getLangOpts().CPlusPlus11) - return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, nullptr, Loc); + return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, nullptr); ICEDiag D = CheckICE(this, Ctx); - if (D.Kind != IK_ICE) { - if (Loc) *Loc = D.Loc; + if (D.Kind != IK_ICE) return false; - } return true; } std::optional<llvm::APSInt> -Expr::getIntegerConstantExpr(const ASTContext &Ctx, SourceLocation *Loc) const { +Expr::getIntegerConstantExpr(const ASTContext &Ctx) const { if (isValueDependent()) { // Expression evaluator can't succeed on a dependent expression. return std::nullopt; } - APSInt Value; - if (Ctx.getLangOpts().CPlusPlus11) { - if (EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc)) + APSInt Value; + if (EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value)) return Value; return std::nullopt; } - if (!isIntegerConstantExpr(Ctx, Loc)) + if (!isIntegerConstantExpr(Ctx)) return std::nullopt; // The only possible side-effects here are due to UB discovered in the @@ -17855,8 +17846,7 @@ bool Expr::isCXX98IntegralConstantExpr(const ASTContext &Ctx) const { return CheckICE(this, Ctx).Kind == IK_ICE; } -bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, - SourceLocation *Loc) const { +bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result) const { assert(!isValueDependent() && "Expression evaluator can't be called on a dependent expression."); @@ -17864,28 +17854,27 @@ bool Expr::isCXX11ConstantExpr(const ASTContext &Ctx, APValue *Result, // issues. assert(Ctx.getLangOpts().CPlusPlus); + bool IsConst; + APValue Scratch; + if (FastEvaluateAsRValue(this, Scratch, Ctx, IsConst) && Scratch.hasValue()) { + if (Result) + *Result = Scratch; + return true; + } + // Build evaluation settings. Expr::EvalStatus Status; SmallVector<PartialDiagnosticAt, 8> Diags; Status.Diag = &Diags; EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantExpression); - APValue Scratch; bool IsConstExpr = ::EvaluateAsRValue(Info, this, Result ? *Result : Scratch) && // FIXME: We don't produce a diagnostic for this, but the callers that // call us on arbitrary full-expressions should generally not care. Info.discardCleanups() && !Status.HasSideEffects; - if (!Diags.empty()) { - IsConstExpr = false; - if (Loc) *Loc = Diags[0].first; - } else if (!IsConstExpr) { - // FIXME: This shouldn't happen. - if (Loc) *Loc = getExprLoc(); - } - - return IsConstExpr; + return IsConstExpr && Diags.empty(); } bool Expr::EvaluateWithSubstitution(APValue &Value, ASTContext &Ctx, diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp index 987f894..688efe4 100644 --- a/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/clang/lib/Analysis/RetainSummaryManager.cpp @@ -147,8 +147,7 @@ static bool isSubclass(const Decl *D, static bool isExactClass(const Decl *D, StringRef ClassName) { using namespace ast_matchers; - DeclarationMatcher sameClassM = - cxxRecordDecl(hasName(std::string(ClassName))); + DeclarationMatcher sameClassM = cxxRecordDecl(hasName(ClassName)); return !(match(sameClassM, *D, D->getASTContext()).empty()); } diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index 6bec2fa..75fdf38 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -133,19 +133,24 @@ void ARMTargetInfo::setArchInfo(llvm::ARM::ArchKind Kind) { } void ARMTargetInfo::setAtomic() { - // when triple does not specify a sub arch, - // then we are not using inline atomics - bool ShouldUseInlineAtomic = - (ArchISA == llvm::ARM::ISAKind::ARM && ArchVersion >= 6) || - (ArchISA == llvm::ARM::ISAKind::THUMB && ArchVersion >= 7); - // Cortex M does not support 8 byte atomics, while general Thumb2 does. if (ArchProfile == llvm::ARM::ProfileKind::M) { + // M-class only ever supports 32-bit atomics. Cortex-M0 doesn't have + // any atomics. MaxAtomicPromoteWidth = 32; - if (ShouldUseInlineAtomic) + if (ArchVersion >= 7) MaxAtomicInlineWidth = 32; } else { + // A-class targets have up to 64-bit atomics. + // + // On Linux, 64-bit atomics are always available through kernel helpers + // (which are lock-free). Otherwise, atomics are available on v6 or later. + // + // (Thumb doesn't matter; for Thumbv6, we just use a library call which + // switches out of Thumb mode.) + // + // This should match setMaxAtomicSizeInBitsSupported() in the backend. MaxAtomicPromoteWidth = 64; - if (ShouldUseInlineAtomic) + if (getTriple().getOS() == llvm::Triple::Linux || ArchVersion >= 6) MaxAtomicInlineWidth = 64; } } diff --git a/clang/lib/CIR/CodeGen/Address.h b/clang/lib/CIR/CodeGen/Address.h index 6f76c3e..6c927e9 100644 --- a/clang/lib/CIR/CodeGen/Address.h +++ b/clang/lib/CIR/CodeGen/Address.h @@ -101,6 +101,17 @@ public: } clang::CharUnits getAlignment() const { return alignment; } + + /// Get the operation which defines this address. + mlir::Operation *getDefiningOp() const { + if (!isValid()) + return nullptr; + return getPointer().getDefiningOp(); + } + + template <typename OpTy> OpTy getDefiningOp() const { + return mlir::dyn_cast_or_null<OpTy>(getDefiningOp()); + } }; } // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenClass.cpp b/clang/lib/CIR/CodeGen/CIRGenClass.cpp index 50cca0e..72b9d17 100644 --- a/clang/lib/CIR/CodeGen/CIRGenClass.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenClass.cpp @@ -349,12 +349,16 @@ void CIRGenFunction::emitCXXAggrConstructorCall( // doesn't happen, but it's not clear that it's worth it. // Optimize for a constant count. - auto constantCount = dyn_cast<cir::ConstantOp>(numElements.getDefiningOp()); - if (constantCount) { - auto constIntAttr = mlir::dyn_cast<cir::IntAttr>(constantCount.getValue()); - // Just skip out if the constant count is zero. - if (constIntAttr && constIntAttr.getUInt() == 0) - return; + if (auto constantCount = numElements.getDefiningOp<cir::ConstantOp>()) { + if (auto constIntAttr = constantCount.getValueAttr<cir::IntAttr>()) { + // Just skip out if the constant count is zero. + if (constIntAttr.getUInt() == 0) + return; + // Otherwise, emit the check. + } + + if (constantCount.use_empty()) + constantCount.erase(); } else { // Otherwise, emit the check. cgm.errorNYI(e->getSourceRange(), "dynamic-length array expression"); @@ -417,9 +421,6 @@ void CIRGenFunction::emitCXXAggrConstructorCall( builder.create<cir::YieldOp>(loc); }); } - - if (constantCount.use_empty()) - constantCount.erase(); } void CIRGenFunction::emitDelegateCXXConstructorCall( diff --git a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp index 9e8eaa5..9cdbebe 100644 --- a/clang/lib/CIR/CodeGen/CIRGenDecl.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenDecl.cpp @@ -24,7 +24,8 @@ using namespace clang; using namespace clang::CIRGen; CIRGenFunction::AutoVarEmission -CIRGenFunction::emitAutoVarAlloca(const VarDecl &d) { +CIRGenFunction::emitAutoVarAlloca(const VarDecl &d, + mlir::OpBuilder::InsertPoint ip) { QualType ty = d.getType(); if (ty.getAddressSpace() != LangAS::Default) cgm.errorNYI(d.getSourceRange(), "emitAutoVarAlloca: address space"); @@ -50,7 +51,8 @@ CIRGenFunction::emitAutoVarAlloca(const VarDecl &d) { // A normal fixed sized variable becomes an alloca in the entry block, mlir::Type allocaTy = convertTypeForMem(ty); // Create the temp alloca and declare variable using it. - address = createTempAlloca(allocaTy, alignment, loc, d.getName()); + address = createTempAlloca(allocaTy, alignment, loc, d.getName(), + /*arraySize=*/nullptr, /*alloca=*/nullptr, ip); declare(address.getPointer(), &d, ty, getLoc(d.getSourceRange()), alignment); emission.Addr = address; @@ -651,6 +653,27 @@ void CIRGenFunction::emitNullabilityCheck(LValue lhs, mlir::Value rhs, assert(!cir::MissingFeatures::sanitizers()); } +namespace { +struct DestroyObject final : EHScopeStack::Cleanup { + DestroyObject(Address addr, QualType type, + CIRGenFunction::Destroyer *destroyer) + : addr(addr), type(type), destroyer(destroyer) {} + + Address addr; + QualType type; + CIRGenFunction::Destroyer *destroyer; + + void emit(CIRGenFunction &cgf) override { + cgf.emitDestroy(addr, type, destroyer); + } +}; +} // namespace + +void CIRGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr, + QualType type, Destroyer *destroyer) { + pushFullExprCleanup<DestroyObject>(cleanupKind, addr, type, destroyer); +} + /// Destroys all the elements of the given array, beginning from last to first. /// The array cannot be zero-length. /// @@ -738,22 +761,6 @@ CIRGenFunction::getDestroyer(QualType::DestructionKind kind) { llvm_unreachable("Unknown DestructionKind"); } -namespace { -struct DestroyObject final : EHScopeStack::Cleanup { - DestroyObject(Address addr, QualType type, - CIRGenFunction::Destroyer *destroyer) - : addr(addr), type(type), destroyer(destroyer) {} - - Address addr; - QualType type; - CIRGenFunction::Destroyer *destroyer; - - void emit(CIRGenFunction &cgf) override { - cgf.emitDestroy(addr, type, destroyer); - } -}; -} // namespace - /// Enter a destroy cleanup for the given local variable. void CIRGenFunction::emitAutoVarTypeCleanup( const CIRGenFunction::AutoVarEmission &emission, diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index d267504..761d8d3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -721,8 +721,8 @@ static const Expr *getSimpleArrayDecayOperand(const Expr *e) { static cir::IntAttr getConstantIndexOrNull(mlir::Value idx) { // TODO(cir): should we consider using MLIRs IndexType instead of IntegerAttr? - if (auto constantOp = dyn_cast<cir::ConstantOp>(idx.getDefiningOp())) - return mlir::dyn_cast<cir::IntAttr>(constantOp.getValue()); + if (auto constantOp = idx.getDefiningOp<cir::ConstantOp>()) + return constantOp.getValueAttr<cir::IntAttr>(); return {}; } @@ -730,8 +730,7 @@ static CharUnits getArrayElementAlign(CharUnits arrayAlign, mlir::Value idx, CharUnits eltSize) { // If we have a constant index, we can use the exact offset of the // element we're accessing. - const cir::IntAttr constantIdx = getConstantIndexOrNull(idx); - if (constantIdx) { + if (const cir::IntAttr constantIdx = getConstantIndexOrNull(idx)) { const CharUnits offset = constantIdx.getValue().getZExtValue() * eltSize; return arrayAlign.alignmentAtOffset(offset); } @@ -1105,6 +1104,151 @@ void CIRGenFunction::emitAnyExprToMem(const Expr *e, Address location, llvm_unreachable("bad evaluation kind"); } +static Address createReferenceTemporary(CIRGenFunction &cgf, + const MaterializeTemporaryExpr *m, + const Expr *inner) { + // TODO(cir): cgf.getTargetHooks(); + switch (m->getStorageDuration()) { + case SD_FullExpression: + case SD_Automatic: { + QualType ty = inner->getType(); + + assert(!cir::MissingFeatures::mergeAllConstants()); + + // The temporary memory should be created in the same scope as the extending + // declaration of the temporary materialization expression. + cir::AllocaOp extDeclAlloca; + if (const ValueDecl *extDecl = m->getExtendingDecl()) { + auto extDeclAddrIter = cgf.localDeclMap.find(extDecl); + if (extDeclAddrIter != cgf.localDeclMap.end()) + extDeclAlloca = extDeclAddrIter->second.getDefiningOp<cir::AllocaOp>(); + } + mlir::OpBuilder::InsertPoint ip; + if (extDeclAlloca) + ip = {extDeclAlloca->getBlock(), extDeclAlloca->getIterator()}; + return cgf.createMemTemp(ty, cgf.getLoc(m->getSourceRange()), + cgf.getCounterRefTmpAsString(), /*alloca=*/nullptr, + ip); + } + case SD_Thread: + case SD_Static: { + cgf.cgm.errorNYI( + m->getSourceRange(), + "createReferenceTemporary: static/thread storage duration"); + return Address::invalid(); + } + + case SD_Dynamic: + llvm_unreachable("temporary can't have dynamic storage duration"); + } + llvm_unreachable("unknown storage duration"); +} + +static void pushTemporaryCleanup(CIRGenFunction &cgf, + const MaterializeTemporaryExpr *m, + const Expr *e, Address referenceTemporary) { + // Objective-C++ ARC: + // If we are binding a reference to a temporary that has ownership, we + // need to perform retain/release operations on the temporary. + // + // FIXME(ogcg): This should be looking at e, not m. + if (m->getType().getObjCLifetime()) { + cgf.cgm.errorNYI(e->getSourceRange(), "pushTemporaryCleanup: ObjCLifetime"); + return; + } + + CXXDestructorDecl *referenceTemporaryDtor = nullptr; + if (const clang::RecordType *rt = e->getType() + ->getBaseElementTypeUnsafe() + ->getAs<clang::RecordType>()) { + // Get the destructor for the reference temporary. + auto *classDecl = cast<CXXRecordDecl>(rt->getDecl()); + if (!classDecl->hasTrivialDestructor()) + referenceTemporaryDtor = classDecl->getDestructor(); + } + + if (!referenceTemporaryDtor) + return; + + // Call the destructor for the temporary. + switch (m->getStorageDuration()) { + case SD_Static: + case SD_Thread: + cgf.cgm.errorNYI(e->getSourceRange(), + "pushTemporaryCleanup: static/thread storage duration"); + return; + + case SD_FullExpression: + cgf.pushDestroy(NormalAndEHCleanup, referenceTemporary, e->getType(), + CIRGenFunction::destroyCXXObject); + break; + + case SD_Automatic: + cgf.cgm.errorNYI(e->getSourceRange(), + "pushTemporaryCleanup: automatic storage duration"); + break; + + case SD_Dynamic: + llvm_unreachable("temporary cannot have dynamic storage duration"); + } +} + +LValue CIRGenFunction::emitMaterializeTemporaryExpr( + const MaterializeTemporaryExpr *m) { + const Expr *e = m->getSubExpr(); + + assert((!m->getExtendingDecl() || !isa<VarDecl>(m->getExtendingDecl()) || + !cast<VarDecl>(m->getExtendingDecl())->isARCPseudoStrong()) && + "Reference should never be pseudo-strong!"); + + // FIXME: ideally this would use emitAnyExprToMem, however, we cannot do so + // as that will cause the lifetime adjustment to be lost for ARC + auto ownership = m->getType().getObjCLifetime(); + if (ownership != Qualifiers::OCL_None && + ownership != Qualifiers::OCL_ExplicitNone) { + cgm.errorNYI(e->getSourceRange(), + "emitMaterializeTemporaryExpr: ObjCLifetime"); + return {}; + } + + SmallVector<const Expr *, 2> commaLHSs; + SmallVector<SubobjectAdjustment, 2> adjustments; + e = e->skipRValueSubobjectAdjustments(commaLHSs, adjustments); + + for (const Expr *ignored : commaLHSs) + emitIgnoredExpr(ignored); + + if (isa<OpaqueValueExpr>(e)) { + cgm.errorNYI(e->getSourceRange(), + "emitMaterializeTemporaryExpr: OpaqueValueExpr"); + return {}; + } + + // Create and initialize the reference temporary. + Address object = createReferenceTemporary(*this, m, e); + + if (auto var = object.getPointer().getDefiningOp<cir::GlobalOp>()) { + // TODO(cir): add something akin to stripPointerCasts() to ptr above + cgm.errorNYI(e->getSourceRange(), "emitMaterializeTemporaryExpr: GlobalOp"); + return {}; + } else { + assert(!cir::MissingFeatures::emitLifetimeMarkers()); + emitAnyExprToMem(e, object, Qualifiers(), /*isInitializer=*/true); + } + pushTemporaryCleanup(*this, m, e, object); + + // Perform derived-to-base casts and/or field accesses, to get from the + // temporary object we created (and, potentially, for which we extended + // the lifetime) to the subobject we're binding the reference to. + if (!adjustments.empty()) { + cgm.errorNYI(e->getSourceRange(), + "emitMaterializeTemporaryExpr: Adjustments"); + return {}; + } + + return makeAddrLValue(object, m->getType(), AlignmentSource::Decl); +} + LValue CIRGenFunction::emitCompoundLiteralLValue(const CompoundLiteralExpr *e) { if (e->isFileScope()) { cgm.errorNYI(e->getSourceRange(), "emitCompoundLiteralLValue: FileScope"); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index f62be49..32c1c1a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -48,8 +48,8 @@ struct BinOpInfo { /// Check if the binop can result in integer overflow. bool mayHaveIntegerOverflow() const { // Without constant input, we can't rule out overflow. - auto lhsci = dyn_cast<cir::ConstantOp>(lhs.getDefiningOp()); - auto rhsci = dyn_cast<cir::ConstantOp>(rhs.getDefiningOp()); + auto lhsci = lhs.getDefiningOp<cir::ConstantOp>(); + auto rhsci = rhs.getDefiningOp<cir::ConstantOp>(); if (!lhsci || !rhsci) return true; @@ -626,6 +626,7 @@ public: mlir::Value VisitCXXThisExpr(CXXThisExpr *te) { return cgf.loadCXXThis(); } + mlir::Value VisitExprWithCleanups(ExprWithCleanups *e); mlir::Value VisitCXXNewExpr(const CXXNewExpr *e) { return cgf.emitCXXNewExpr(e); } @@ -1217,6 +1218,29 @@ mlir::Value ScalarExprEmitter::emitCompoundAssign( return emitLoadOfLValue(lhs, e->getExprLoc()); } +mlir::Value ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *e) { + mlir::Location scopeLoc = cgf.getLoc(e->getSourceRange()); + mlir::OpBuilder &builder = cgf.builder; + + auto scope = cir::ScopeOp::create( + builder, scopeLoc, + /*scopeBuilder=*/ + [&](mlir::OpBuilder &b, mlir::Type &yieldTy, mlir::Location loc) { + CIRGenFunction::LexicalScope lexScope{cgf, loc, + builder.getInsertionBlock()}; + mlir::Value scopeYieldVal = Visit(e->getSubExpr()); + if (scopeYieldVal) { + // Defend against dominance problems caused by jumps out of expression + // evaluation through the shared cleanup block. + lexScope.forceCleanup(); + cir::YieldOp::create(builder, loc, scopeYieldVal); + yieldTy = scopeYieldVal.getType(); + } + }); + + return scope.getNumResults() > 0 ? scope->getResult(0) : nullptr; +} + } // namespace LValue diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 0c9bc38..3ed1e30 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -800,6 +800,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { case Expr::CXXDynamicCastExprClass: case Expr::ImplicitCastExprClass: return emitCastLValue(cast<CastExpr>(e)); + case Expr::MaterializeTemporaryExprClass: + return emitMaterializeTemporaryExpr(cast<MaterializeTemporaryExpr>(e)); } } @@ -810,6 +812,10 @@ static std::string getVersionedTmpName(llvm::StringRef name, unsigned cnt) { return std::string(out.str()); } +std::string CIRGenFunction::getCounterRefTmpAsString() { + return getVersionedTmpName("ref.tmp", counterRefTmp++); +} + std::string CIRGenFunction::getCounterAggTmpAsString() { return getVersionedTmpName("agg.tmp", counterAggTmp++); } diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index f9c8636..68d54bb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -325,7 +325,9 @@ public: }; /// Hold counters for incrementally naming temporaries + unsigned counterRefTmp = 0; unsigned counterAggTmp = 0; + std::string getCounterRefTmpAsString(); std::string getCounterAggTmpAsString(); /// Helpers to convert Clang's SourceLocation to a MLIR Location. @@ -604,6 +606,19 @@ public: void popCleanupBlocks(size_t oldCleanupStackDepth); void popCleanupBlock(); + /// Push a cleanup to be run at the end of the current full-expression. Safe + /// against the possibility that we're currently inside a + /// conditionally-evaluated expression. + template <class T, class... As> + void pushFullExprCleanup(CleanupKind kind, As... a) { + // If we're not in a conditional branch, or if none of the + // arguments requires saving, then use the unconditional cleanup. + if (!isInConditionalBranch()) + return ehStack.pushCleanup<T>(kind, a...); + + cgm.errorNYI("pushFullExprCleanup in conditional branch"); + } + /// Enters a new scope for capturing cleanups, all of which /// will be executed once the scope is exited. class RunCleanupsScope { @@ -619,6 +634,7 @@ public: protected: CIRGenFunction &cgf; + public: /// Enter a new cleanup scope. explicit RunCleanupsScope(CIRGenFunction &cgf) : performCleanup(true), cgf(cgf) { @@ -801,6 +817,9 @@ public: static Destroyer destroyCXXObject; + void pushDestroy(CleanupKind kind, Address addr, QualType type, + Destroyer *destroyer); + Destroyer *getDestroyer(clang::QualType::DestructionKind kind); /// ---------------------- @@ -858,7 +877,8 @@ public: Address emitArrayToPointerDecay(const Expr *array); - AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d); + AutoVarEmission emitAutoVarAlloca(const clang::VarDecl &d, + mlir::OpBuilder::InsertPoint ip = {}); /// Emit code and set up symbol table for a variable declaration with auto, /// register, or no storage class specifier. These turn into simple stack @@ -1138,6 +1158,8 @@ public: const clang::FieldDecl *field, llvm::StringRef fieldName); + LValue emitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *e); + LValue emitMemberExpr(const MemberExpr *e); /// Given an expression with a pointer type, emit the value and compute our @@ -1377,6 +1399,7 @@ public: mlir::Location beginLoc; mlir::Value varValue; std::string name; + QualType baseType; llvm::SmallVector<mlir::Value> bounds; }; // Gets the collection of info required to lower and OpenACC clause or cache diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp index 49ff124..32095cb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp @@ -119,7 +119,7 @@ CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) { if (const auto *memExpr = dyn_cast<MemberExpr>(curVarExpr)) return {exprLoc, emitMemberExpr(memExpr).getPointer(), exprString, - std::move(bounds)}; + curVarExpr->getType(), std::move(bounds)}; // Sema has made sure that only 4 types of things can get here, array // subscript, array section, member expr, or DRE to a var decl (or the @@ -127,5 +127,5 @@ CIRGenFunction::getOpenACCDataOperandInfo(const Expr *e) { // right. const auto *dre = cast<DeclRefExpr>(curVarExpr); return {exprLoc, emitDeclRefLValue(dre).getPointer(), exprString, - std::move(bounds)}; + curVarExpr->getType(), std::move(bounds)}; } diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp index e45d3b8f..5a6e665 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp @@ -12,6 +12,7 @@ #include <type_traits> +#include "CIRGenCXXABI.h" #include "CIRGenFunction.h" #include "clang/AST/ExprCXX.h" @@ -355,6 +356,110 @@ class OpenACCClauseCIREmitter final } } + template <typename RecipeTy> + RecipeTy getOrCreateRecipe(ASTContext &astCtx, const Expr *varRef, + DeclContext *dc, QualType baseType, + mlir::Value mainOp) { + mlir::ModuleOp mod = + builder.getBlock()->getParent()->getParentOfType<mlir::ModuleOp>(); + + std::string recipeName; + { + llvm::raw_string_ostream stream(recipeName); + if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) { + stream << "privatization_"; + } else if constexpr (std::is_same_v<RecipeTy, + mlir::acc::FirstprivateRecipeOp>) { + stream << "firstprivatization_"; + + } else if constexpr (std::is_same_v<RecipeTy, + mlir::acc::ReductionRecipeOp>) { + stream << "reduction_"; + // We don't have the reduction operation here well enough to know how to + // spell this correctly (+ == 'add', etc), so when we implement + // 'reduction' we have to do that here. + cgf.cgm.errorNYI(varRef->getSourceRange(), + "OpeNACC reduction recipe creation"); + } else { + static_assert(!sizeof(RecipeTy), "Unknown Recipe op kind"); + } + + MangleContext &mc = cgf.cgm.getCXXABI().getMangleContext(); + mc.mangleCanonicalTypeName(baseType, stream); + } + + if (auto recipe = mod.lookupSymbol<RecipeTy>(recipeName)) + return recipe; + + mlir::Location loc = cgf.cgm.getLoc(varRef->getBeginLoc()); + mlir::Location locEnd = cgf.cgm.getLoc(varRef->getEndLoc()); + + mlir::OpBuilder modBuilder(mod.getBodyRegion()); + auto recipe = + RecipeTy::create(modBuilder, loc, recipeName, mainOp.getType()); + + // Magic-up a var-decl so we can use normal init/destruction operations for + // a variable declaration. + VarDecl &tempDecl = *VarDecl::Create( + astCtx, dc, varRef->getBeginLoc(), varRef->getBeginLoc(), + &astCtx.Idents.get("openacc.private.init"), baseType, + astCtx.getTrivialTypeSourceInfo(baseType), SC_Auto); + CIRGenFunction::AutoVarEmission tempDeclEmission{ + CIRGenFunction::AutoVarEmission::invalid()}; + + // Init section. + { + llvm::SmallVector<mlir::Type> argsTys{mainOp.getType()}; + llvm::SmallVector<mlir::Location> argsLocs{loc}; + builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(), + argsTys, argsLocs); + builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); + + if constexpr (!std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) { + // We have only implemented 'init' for private, so make this NYI until + // we have explicitly implemented everything. + cgf.cgm.errorNYI(varRef->getSourceRange(), + "OpenACC non-private recipe init"); + } + + tempDeclEmission = + cgf.emitAutoVarAlloca(tempDecl, builder.saveInsertionPoint()); + cgf.emitAutoVarInit(tempDeclEmission); + + mlir::acc::YieldOp::create(builder, locEnd); + } + + // Copy section. + if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp> || + std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) { + // TODO: OpenACC: 'private' doesn't emit this, but for the other two we + // have to figure out what 'copy' means here. + cgf.cgm.errorNYI(varRef->getSourceRange(), + "OpenACC record type privatization copy section"); + } + + // Destroy section (doesn't currently exist). + if (tempDecl.needsDestruction(cgf.getContext())) { + llvm::SmallVector<mlir::Type> argsTys{mainOp.getType()}; + llvm::SmallVector<mlir::Location> argsLocs{loc}; + mlir::Block *block = builder.createBlock(&recipe.getDestroyRegion(), + recipe.getDestroyRegion().end(), + argsTys, argsLocs); + builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back()); + + mlir::Type elementTy = + mlir::cast<cir::PointerType>(mainOp.getType()).getPointee(); + Address addr{block->getArgument(0), elementTy, + cgf.getContext().getDeclAlign(&tempDecl)}; + cgf.emitDestroy(addr, baseType, + cgf.getDestroyer(QualType::DK_cxx_destructor)); + + mlir::acc::YieldOp::create(builder, locEnd); + } + + return recipe; + } + public: OpenACCClauseCIREmitter(OpTy &operation, CIRGen::CIRGenFunction &cgf, CIRGen::CIRGenBuilderTy &builder, @@ -971,6 +1076,37 @@ public: llvm_unreachable("Unknown construct kind in VisitAttachClause"); } } + + void VisitPrivateClause(const OpenACCPrivateClause &clause) { + if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp, + mlir::acc::LoopOp>) { + for (const Expr *var : clause.getVarList()) { + CIRGenFunction::OpenACCDataOperandInfo opInfo = + cgf.getOpenACCDataOperandInfo(var); + auto privateOp = mlir::acc::PrivateOp::create( + builder, opInfo.beginLoc, opInfo.varValue, /*structured=*/true, + /*implicit=*/false, opInfo.name, opInfo.bounds); + privateOp.setDataClause(mlir::acc::DataClause::acc_private); + + { + mlir::OpBuilder::InsertionGuard guardCase(builder); + auto recipe = getOrCreateRecipe<mlir::acc::PrivateRecipeOp>( + cgf.getContext(), var, Decl::castToDeclContext(cgf.curFuncDecl), + opInfo.baseType, privateOp.getResult()); + // TODO: OpenACC: The dialect is going to change in the near future to + // have these be on a different operation, so when that changes, we + // probably need to change these here. + operation.addPrivatization(builder.getContext(), privateOp, recipe); + } + } + } else if constexpr (isCombinedType<OpTy>) { + // Despite this being valid on ParallelOp or SerialOp, combined type + // applies to the 'loop'. + applyToLoopOp(clause); + } else { + llvm_unreachable("Unknown construct kind in VisitPrivateClause"); + } + } }; template <typename OpTy> diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index 0c8ff4bd..8b01d41a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -41,7 +41,7 @@ struct CIRRecordLowering final { // member type that ensures correct rounding. struct MemberInfo final { CharUnits offset; - enum class InfoKind { Field, Base } kind; + enum class InfoKind { VFPtr, Field, Base } kind; mlir::Type data; union { const FieldDecl *fieldDecl; @@ -87,6 +87,8 @@ struct CIRRecordLowering final { accumulateBitFields(RecordDecl::field_iterator field, RecordDecl::field_iterator fieldEnd); + mlir::Type getVFPtrType(); + bool isAAPCS() const { return astContext.getTargetInfo().getABI().starts_with("aapcs"); } @@ -902,9 +904,14 @@ void CIRRecordLowering::accumulateBases(const CXXRecordDecl *cxxRecordDecl) { void CIRRecordLowering::accumulateVPtrs() { if (astRecordLayout.hasOwnVFPtr()) - cirGenTypes.getCGModule().errorNYI(recordDecl->getSourceRange(), - "accumulateVPtrs: hasOwnVFPtr"); + members.push_back(MemberInfo(CharUnits::Zero(), MemberInfo::InfoKind::VFPtr, + getVFPtrType())); + if (astRecordLayout.hasOwnVBPtr()) cirGenTypes.getCGModule().errorNYI(recordDecl->getSourceRange(), "accumulateVPtrs: hasOwnVBPtr"); } + +mlir::Type CIRRecordLowering::getVFPtrType() { + return cir::VPtrType::get(builder.getContext()); +} diff --git a/clang/lib/CIR/CodeGen/CIRGenerator.cpp b/clang/lib/CIR/CodeGen/CIRGenerator.cpp index 99d6528..b0357d9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenerator.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenerator.cpp @@ -152,9 +152,30 @@ void CIRGenerator::HandleTagDeclDefinition(TagDecl *d) { cgm->errorNYI(d->getSourceRange(), "HandleTagDeclDefinition: OpenMP"); } +void CIRGenerator::HandleTagDeclRequiredDefinition(const TagDecl *D) { + if (diags.hasErrorOccurred()) + return; + + assert(!cir::MissingFeatures::generateDebugInfo()); +} + +void CIRGenerator::HandleCXXStaticMemberVarInstantiation(VarDecl *D) { + if (diags.hasErrorOccurred()) + return; + + cgm->errorNYI(D->getSourceRange(), "HandleCXXStaticMemberVarInstantiation"); +} + void CIRGenerator::CompleteTentativeDefinition(VarDecl *d) { if (diags.hasErrorOccurred()) return; cgm->emitTentativeDefinition(d); } + +void CIRGenerator::HandleVTable(CXXRecordDecl *rd) { + if (diags.hasErrorOccurred()) + return; + + cgm->errorNYI(rd->getSourceRange(), "HandleVTable"); +} diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index 1c3a310..263ff15 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -1833,7 +1833,7 @@ LogicalResult cir::GetMemberOp::verify() { OpFoldResult cir::VecCreateOp::fold(FoldAdaptor adaptor) { if (llvm::any_of(getElements(), [](mlir::Value value) { - return !mlir::isa<cir::ConstantOp>(value.getDefiningOp()); + return !value.getDefiningOp<cir::ConstantOp>(); })) return {}; diff --git a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp index 40da5e6..4fecb01 100644 --- a/clang/lib/CIR/Dialect/IR/CIRTypes.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRTypes.cpp @@ -698,6 +698,23 @@ BoolType::getABIAlignment(const ::mlir::DataLayout &dataLayout, } //===----------------------------------------------------------------------===// +// VPtrType Definitions +//===----------------------------------------------------------------------===// + +llvm::TypeSize +VPtrType::getTypeSizeInBits(const mlir::DataLayout &dataLayout, + mlir::DataLayoutEntryListRef params) const { + // FIXME: consider size differences under different ABIs + return llvm::TypeSize::getFixed(64); +} + +uint64_t VPtrType::getABIAlignment(const mlir::DataLayout &dataLayout, + mlir::DataLayoutEntryListRef params) const { + // FIXME: consider alignment differences under different ABIs + return 8; +} + +//===----------------------------------------------------------------------===// // ArrayType Definitions //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp index 3b7f08c4..3c6f768 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRSimplify.cpp @@ -97,8 +97,8 @@ private: // Check whether the region/block contains a cir.const followed by a // cir.yield that yields the value. auto yieldOp = mlir::cast<cir::YieldOp>(onlyBlock.getTerminator()); - auto yieldValueDefOp = mlir::dyn_cast_if_present<cir::ConstantOp>( - yieldOp.getArgs()[0].getDefiningOp()); + auto yieldValueDefOp = + yieldOp.getArgs()[0].getDefiningOp<cir::ConstantOp>(); return yieldValueDefOp && yieldValueDefOp->getBlock() == &onlyBlock; } }; @@ -126,18 +126,13 @@ struct SimplifySelect : public OpRewritePattern<SelectOp> { LogicalResult matchAndRewrite(SelectOp op, PatternRewriter &rewriter) const final { - mlir::Operation *trueValueOp = op.getTrueValue().getDefiningOp(); - mlir::Operation *falseValueOp = op.getFalseValue().getDefiningOp(); - auto trueValueConstOp = - mlir::dyn_cast_if_present<cir::ConstantOp>(trueValueOp); - auto falseValueConstOp = - mlir::dyn_cast_if_present<cir::ConstantOp>(falseValueOp); - if (!trueValueConstOp || !falseValueConstOp) + auto trueValueOp = op.getTrueValue().getDefiningOp<cir::ConstantOp>(); + auto falseValueOp = op.getFalseValue().getDefiningOp<cir::ConstantOp>(); + if (!trueValueOp || !falseValueOp) return mlir::failure(); - auto trueValue = mlir::dyn_cast<cir::BoolAttr>(trueValueConstOp.getValue()); - auto falseValue = - mlir::dyn_cast<cir::BoolAttr>(falseValueConstOp.getValue()); + auto trueValue = trueValueOp.getValueAttr<cir::BoolAttr>(); + auto falseValue = falseValueOp.getValueAttr<cir::BoolAttr>(); if (!trueValue || !falseValue) return mlir::failure(); @@ -265,8 +260,7 @@ struct SimplifyVecSplat : public OpRewritePattern<VecSplatOp> { LogicalResult matchAndRewrite(VecSplatOp op, PatternRewriter &rewriter) const override { mlir::Value splatValue = op.getValue(); - auto constant = - mlir::dyn_cast_if_present<cir::ConstantOp>(splatValue.getDefiningOp()); + auto constant = splatValue.getDefiningOp<cir::ConstantOp>(); if (!constant) return mlir::failure(); diff --git a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp index 9264aa6..67bb565 100644 --- a/clang/lib/CIR/FrontendAction/CIRGenAction.cpp +++ b/clang/lib/CIR/FrontendAction/CIRGenAction.cpp @@ -84,6 +84,10 @@ public: return true; } + void HandleCXXStaticMemberVarInstantiation(clang::VarDecl *VD) override { + Gen->HandleCXXStaticMemberVarInstantiation(VD); + } + void HandleInlineFunctionDefinition(FunctionDecl *D) override { Gen->HandleInlineFunctionDefinition(D); } @@ -147,9 +151,15 @@ public: Gen->HandleTagDeclDefinition(D); } + void HandleTagDeclRequiredDefinition(const TagDecl *D) override { + Gen->HandleTagDeclRequiredDefinition(D); + } + void CompleteTentativeDefinition(VarDecl *D) override { Gen->CompleteTentativeDefinition(D); } + + void HandleVTable(CXXRecordDecl *RD) override { Gen->HandleVTable(RD); } }; } // namespace cir diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 957a51a..895872b 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1935,12 +1935,11 @@ mlir::LogicalResult CIRToLLVMSelectOpLowering::matchAndRewrite( cir::SelectOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { auto getConstantBool = [](mlir::Value value) -> cir::BoolAttr { - auto definingOp = - mlir::dyn_cast_if_present<cir::ConstantOp>(value.getDefiningOp()); + auto definingOp = value.getDefiningOp<cir::ConstantOp>(); if (!definingOp) return {}; - auto constValue = mlir::dyn_cast<cir::BoolAttr>(definingOp.getValue()); + auto constValue = definingOp.getValueAttr<cir::BoolAttr>(); if (!constValue) return {}; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e1f7ea0..a648bde 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -973,6 +973,9 @@ public: AddrOfSeen = false; return Visit(E->getSubExpr()); } + const Expr *VisitBinaryOperator(const clang::BinaryOperator *Op) { + return Op->isCommaOp() ? Visit(Op->getRHS()) : nullptr; + } }; } // end anonymous namespace diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index b6007f5..1ce834d 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -4801,7 +4801,7 @@ void CGDebugInfo::EmitFuncDeclForCallSite(llvm::CallBase *CallOrInvoke, const FunctionDecl *CalleeDecl) { if (!CallOrInvoke) return; - auto *Func = CallOrInvoke->getCalledFunction(); + auto *Func = dyn_cast<llvm::Function>(CallOrInvoke->getCalledOperand()); if (!Func) return; if (Func->getSubprogram()) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 0771c7c..334fcbb 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -3014,12 +3014,12 @@ void tools::addOffloadCompressArgs(const llvm::opt::ArgList &TCArgs, llvm::opt::ArgStringList &CmdArgs) { if (TCArgs.hasFlag(options::OPT_offload_compress, options::OPT_no_offload_compress, false)) - CmdArgs.push_back("-compress"); + CmdArgs.push_back("--compress"); if (TCArgs.hasArg(options::OPT_v)) - CmdArgs.push_back("-verbose"); + CmdArgs.push_back("--verbose"); if (auto *Arg = TCArgs.getLastArg(options::OPT_offload_compression_level_EQ)) CmdArgs.push_back( - TCArgs.MakeArgString(Twine("-compression-level=") + Arg->getValue())); + TCArgs.MakeArgString(Twine("--compression-level=") + Arg->getValue())); } void tools::addMCModel(const Driver &D, const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 1f0b478..fdfcea8 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -549,22 +549,16 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, auto *A = II.getAction(); assert(A->getInputs().size() == 1 && "Device offload action is expected to have a single input"); - const char *gpu_arch_str = A->getOffloadingArch(); - assert(gpu_arch_str && + StringRef GpuArch = A->getOffloadingArch(); + assert(!GpuArch.empty() && "Device action expected to have associated a GPU architecture!"); - OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str); - if (II.getType() == types::TY_PP_Asm && - !shouldIncludePTX(Args, gpu_arch_str)) + if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch)) continue; - // We need to pass an Arch of the form "sm_XX" for cubin files and - // "compute_XX" for ptx. - const char *Arch = (II.getType() == types::TY_PP_Asm) - ? OffloadArchToVirtualArchString(gpu_arch) - : gpu_arch_str; - CmdArgs.push_back( - Args.MakeArgString(llvm::Twine("--image=profile=") + Arch + - ",file=" + getToolChain().getInputFilename(II))); + StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf"; + CmdArgs.push_back(Args.MakeArgString( + "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) + + ",file=" + getToolChain().getInputFilename(II))); } for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary)) diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index 137d225..87cc2fc 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -9,6 +9,7 @@ #include "clang/Frontend/FrontendAction.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" #include "clang/AST/DeclGroup.h" #include "clang/Basic/Builtins.h" #include "clang/Basic/DiagnosticOptions.h" @@ -39,6 +40,7 @@ #include "clang/Serialization/ASTReader.h" #include "clang/Serialization/GlobalModuleIndex.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/BuryPointer.h" #include "llvm/Support/ErrorHandling.h" @@ -87,12 +89,25 @@ public: // reducing the granularity and making the output less useful. return; } - if (auto *DC = D->getDeclContext(); !DC || !DC->isFileContext()) { + auto *DC = D->getLexicalDeclContext(); + if (!DC || !DC->isFileContext()) { // We choose to work at namespace level to reduce complexity and the // number of cases we care about. return; } + PendingDecls.push_back(D); + if (auto *NS = dyn_cast<NamespaceDecl>(DC)) { + // Add any namespaces we have not seen before. + // Note that we filter out namespaces from DeclRead as it includes too + // all redeclarations and we only want the ones that had other used + // declarations. + while (NS && ProcessedNamespaces.insert(NS).second) { + PendingDecls.push_back(NS); + + NS = dyn_cast<NamespaceDecl>(NS->getLexicalParent()); + } + } } struct Position { @@ -141,23 +156,25 @@ public: OptionalFileEntryRef Ref; }; llvm::DenseMap<const FileEntry *, FileData> FileToRanges; + for (const Decl *D : PendingDecls) { - CharSourceRange R = SM.getExpansionRange(D->getSourceRange()); - if (!R.isValid()) - continue; + for (CharSourceRange R : getRangesToMark(D)) { + if (!R.isValid()) + continue; - auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); - if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { - // Such cases are rare and difficult to handle. - continue; - } + auto *F = SM.getFileEntryForID(SM.getFileID(R.getBegin())); + if (F != SM.getFileEntryForID(SM.getFileID(R.getEnd()))) { + // Such cases are rare and difficult to handle. + continue; + } - auto &Data = FileToRanges[F]; - if (!Data.Ref) - Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); - Data.FromTo.push_back( - {Position::GetBeginSpelling(SM, R), - Position::GetEndSpelling(SM, R, D->getLangOpts())}); + auto &Data = FileToRanges[F]; + if (!Data.Ref) + Data.Ref = SM.getFileEntryRefForID(SM.getFileID(R.getBegin())); + Data.FromTo.push_back( + {Position::GetBeginSpelling(SM, R), + Position::GetEndSpelling(SM, R, D->getLangOpts())}); + } } // To simplify output, merge consecutive and intersecting ranges. @@ -188,10 +205,49 @@ public: private: std::vector<const Decl *> PendingDecls; + llvm::SmallPtrSet<const NamespaceDecl *, 0> ProcessedNamespaces; bool IsCollectingDecls = true; const SourceManager &SM; std::unique_ptr<llvm::raw_ostream> OS; + llvm::SmallVector<CharSourceRange, 2> getRangesToMark(const Decl *D) { + auto *NS = dyn_cast<NamespaceDecl>(D); + if (!NS) + return {SM.getExpansionRange(D->getSourceRange())}; + + SourceLocation LBraceLoc; + if (NS->isAnonymousNamespace()) { + LBraceLoc = NS->getLocation(); + } else { + // Start with the location of the identifier. + SourceLocation TokenBeforeLBrace = NS->getLocation(); + if (NS->hasAttrs()) { + for (auto *A : NS->getAttrs()) { + // But attributes may go after it. + if (SM.isBeforeInTranslationUnit(TokenBeforeLBrace, + A->getRange().getEnd())) { + // Give up, the attributes are often coming from macros and we + // cannot skip them reliably. + return {}; + } + } + } + auto &LangOpts = D->getLangOpts(); + // Now skip one token, the next should be the lbrace. + Token Tok; + if (Lexer::getRawToken(TokenBeforeLBrace, Tok, SM, LangOpts, true) || + Lexer::getRawToken(Tok.getEndLoc(), Tok, SM, LangOpts, true) || + Tok.getKind() != tok::l_brace) { + // On error or if we did not find the token we expected, avoid marking + // everything inside the namespace as used. + return {}; + } + LBraceLoc = Tok.getLocation(); + } + return {SM.getExpansionRange(SourceRange(NS->getBeginLoc(), LBraceLoc)), + SM.getExpansionRange(NS->getRBraceLoc())}; + } + void printJson(llvm::ArrayRef<RequiredRanges> Result) { *OS << "{\n"; *OS << R"( "required_ranges": [)" << "\n"; @@ -226,6 +282,8 @@ private: } *OS << " ]\n"; *OS << "}\n"; + + OS->flush(); } }; diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h index 44934ba..b1e5924 100644 --- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h @@ -385,7 +385,12 @@ __host__ __device__ void __nv_tex_surf_handler(const char *name, T *ptr, #endif // CUDA_VERSION #endif // __cplusplus >= 201103L && CUDA_VERSION >= 9000 #include "surface_indirect_functions.h" +#if CUDA_VERSION < 13000 +// Direct texture fetch functions had been deprecated since CUDA-11. +// The file in CUDA-12 only carried unused texture types, and is no longer +// needed. #include "texture_fetch_functions.h" +#endif // CUDA_VERSION < 13000 #include "texture_indirect_functions.h" // Restore state of __CUDA_ARCH__ and __THROW we had on entry. diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index d255c11..20fdf2d 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -14724,9 +14724,10 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { type->isIntegralOrEnumerationType()) { // In C++98, in-class initialization for a static data member must // be an integer constant expression. - SourceLocation Loc; - if (!Init->isIntegerConstantExpr(Context, &Loc)) { - Diag(Loc, diag::ext_in_class_initializer_non_constant) + // SourceLocation Loc; + if (!Init->isIntegerConstantExpr(Context)) { + Diag(Init->getExprLoc(), + diag::ext_in_class_initializer_non_constant) << Init->getSourceRange(); } } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index f1a25a7..4ddf8fd 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -868,7 +868,8 @@ VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Ex, QualType T = Ex->getTypeOfArgument(); for (ExplodedNode *N : CheckedSet) { - if (Ex->getKind() == UETT_SizeOf) { + if (Ex->getKind() == UETT_SizeOf || Ex->getKind() == UETT_DataSizeOf || + Ex->getKind() == UETT_CountOf) { if (!T->isIncompleteType() && !T->isConstantSizeType()) { assert(T->isVariableArrayType() && "Unknown non-constant-sized type."); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp index 8535384..fe70558 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp @@ -1227,7 +1227,7 @@ void ExprEngine::VisitAttributedStmt(const AttributedStmt *A, for (const auto *Attr : getSpecificAttrs<CXXAssumeAttr>(A->getAttrs())) { for (ExplodedNode *N : CheckerPreStmt) { - Visit(Attr->getAssumption(), N, EvalSet); + Visit(Attr->getAssumption()->IgnoreParens(), N, EvalSet); } } diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp index 3e68373..0058a0d 100644 --- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp +++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp @@ -1219,6 +1219,16 @@ MemRegionManager::getElementRegion(QualType elementType, NonLoc Idx, const ASTContext &Ctx) { QualType T = Ctx.getCanonicalType(elementType).getUnqualifiedType(); + // The address space must be preserved because some target-specific address + // spaces influence the size of the pointer value which is represented by the + // element region. + LangAS AS = elementType.getAddressSpace(); + if (AS != LangAS::Default) { + Qualifiers Quals; + Quals.setAddressSpace(AS); + T = Ctx.getQualifiedType(T, Quals); + } + llvm::FoldingSetNodeID ID; ElementRegion::ProfileRegion(ID, T, Idx, superRegion); diff --git a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp index 37f8b94..d67178c 100644 --- a/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp +++ b/clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp @@ -144,30 +144,8 @@ static void optimizeDiagnosticOpts(DiagnosticOptions &Opts, static void optimizeCWD(CowCompilerInvocation &BuildInvocation, StringRef CWD) { BuildInvocation.getMutFileSystemOpts().WorkingDir.clear(); - if (BuildInvocation.getCodeGenOpts().DwarfVersion) { - // It is necessary to explicitly set the DebugCompilationDir - // to a common directory (e.g. root) if IgnoreCWD is true. - // When IgnoreCWD is true, the module's content should not - // depend on the current working directory. However, if dwarf - // information is needed (when CGOpts.DwarfVersion is - // non-zero), then CGOpts.DebugCompilationDir must be - // populated, because otherwise the current working directory - // will be automatically embedded in the dwarf information in - // the pcm, contradicting the assumption that it is safe to - // ignore the CWD. Thus in such cases, - // CGOpts.DebugCompilationDir is explicitly set to a common - // directory. - // FIXME: It is still excessive to create a copy of - // CodeGenOpts for each module. Since we do not modify the - // CodeGenOpts otherwise per module, the following code - // ends up generating identical CodeGenOpts for each module - // with DebugCompilationDir pointing to the root directory. - // We can optimize this away by creating a _single_ copy of - // CodeGenOpts whose DebugCompilationDir points to the root - // directory and reuse it across modules. - BuildInvocation.getMutCodeGenOpts().DebugCompilationDir = - llvm::sys::path::root_path(CWD); - } + BuildInvocation.getMutCodeGenOpts().DebugCompilationDir.clear(); + BuildInvocation.getMutCodeGenOpts().CoverageCompilationDir.clear(); } static std::vector<std::string> splitString(std::string S, char Separator) { diff --git a/clang/test/Analysis/builtin_assume.cpp b/clang/test/Analysis/builtin_assume.cpp index 7158306..29a96c0 100644 --- a/clang/test/Analysis/builtin_assume.cpp +++ b/clang/test/Analysis/builtin_assume.cpp @@ -62,3 +62,16 @@ int using_builtin_assume_has_no_sideeffects(int y) { return y; } + +template <int ...args> +bool issue151529() { + // no-crash + [[assume((true))]]; + // no-crash + [[assume(((args >= 0) && ...))]]; // expected-warning {{pack fold expression is a C++17 extension}} + return ((args >= 0) && ...); // expected-warning {{pack fold expression is a C++17 extension}} +} + +void instantiate_issue151529() { + issue151529<0>(); +} diff --git a/clang/test/Analysis/element-region-address-space.c b/clang/test/Analysis/element-region-address-space.c new file mode 100644 index 0000000..dd70662 --- /dev/null +++ b/clang/test/Analysis/element-region-address-space.c @@ -0,0 +1,11 @@ +// RUN: %clang_analyze_cc1 -triple amdgcn-unknown-unknown \ +// RUN: -analyzer-checker=core -verify %s + +// expected-no-diagnostics +// +// By default, pointers are 64-bits. +#define ADDRESS_SPACE_32BITS __attribute__((address_space(3))) + +int test(ADDRESS_SPACE_32BITS int *p, ADDRESS_SPACE_32BITS void *q) { + return p == q; // no-crash +} diff --git a/clang/test/Analysis/engine/gh151711.cpp b/clang/test/Analysis/engine/gh151711.cpp new file mode 100644 index 0000000..a9950a7 --- /dev/null +++ b/clang/test/Analysis/engine/gh151711.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify %s +// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection -verify -x c %s + +void clang_analyzer_dump(int); + +// Ensure that VLA types are correctly handled by unary type traits in the +// expression engine. Previously, __datasizeof and _Countof both caused failed +// assertions. +void gh151711(int i) { + clang_analyzer_dump(sizeof(int[i++])); // expected-warning {{Unknown}} +#ifdef __cplusplus + // __datasizeof is only available in C++. + clang_analyzer_dump(__datasizeof(int[i++])); // expected-warning {{Unknown}} +#else + // _Countof is only available in C. + clang_analyzer_dump(_Countof(int[i++])); // expected-warning {{Unknown}} +#endif +} diff --git a/clang/test/CIR/CodeGen/cleanup.cpp b/clang/test/CIR/CodeGen/cleanup.cpp index 41961513..0400d4b 100644 --- a/clang/test/CIR/CodeGen/cleanup.cpp +++ b/clang/test/CIR/CodeGen/cleanup.cpp @@ -81,3 +81,17 @@ void test_cleanup_nested() { // CHECK: } // CHECK: cir.call @_ZN5StrukD1Ev(%[[OUTER]]) nothrow : (!cir.ptr<!rec_Struk>) -> () // CHECK: cir.return + +void use_ref(const Struk &); + +void test_expr_with_cleanup() { + use_ref(Struk{}); +} + +// CHECK: cir.func{{.*}} @_Z22test_expr_with_cleanupv() +// CHECK: cir.scope { +// CHECK: %[[S:.*]] = cir.alloca !rec_Struk, !cir.ptr<!rec_Struk> +// CHECK: cir.call @_Z7use_refRK5Struk(%[[S]]) +// CHECK: cir.call @_ZN5StrukD1Ev(%[[S]]) nothrow : (!cir.ptr<!rec_Struk>) -> () +// CHECK: } +// CHECK: cir.return diff --git a/clang/test/CIR/CodeGen/virtual-function-calls.cpp b/clang/test/CIR/CodeGen/virtual-function-calls.cpp new file mode 100644 index 0000000..3e03b32 --- /dev/null +++ b/clang/test/CIR/CodeGen/virtual-function-calls.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -mconstructor-aliases -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +struct A { + virtual void f(char); +}; + +// This is just here to force the class definition to be emitted without +// requiring any other support. It will be removed when more complete +// vtable support is implemented. +A *a; + +// CIR: !rec_A = !cir.record<struct "A" {!cir.vptr}> diff --git a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp new file mode 100644 index 0000000..3306c55 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp @@ -0,0 +1,522 @@ +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s + +struct NoCopyConstruct {}; + +struct CopyConstruct { + CopyConstruct() = default; + CopyConstruct(const CopyConstruct&); +}; + +struct NonDefaultCtor { + NonDefaultCtor(); +}; + +struct HasDtor { + ~HasDtor(); +}; + +// CHECK: acc.private.recipe @privatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i +// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"] +// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i +// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS7HasDtor : !cir.ptr<!rec_HasDtor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasDtor> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_HasDtor, !cir.ptr<!rec_HasDtor>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasDtor> {{.*}}): +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS14NonDefaultCtor : !cir.ptr<!rec_NonDefaultCtor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_NonDefaultCtor> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_NonDefaultCtor, !cir.ptr<!rec_NonDefaultCtor>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS13CopyConstruct : !cir.ptr<!rec_CopyConstruct> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_CopyConstruct> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_CopyConstruct, !cir.ptr<!rec_CopyConstruct>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS15NoCopyConstruct : !cir.ptr<!rec_NoCopyConstruct> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_NoCopyConstruct> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_NoCopyConstruct, !cir.ptr<!rec_NoCopyConstruct>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSf : !cir.ptr<!cir.float> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSi : !cir.ptr<!s32i> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!s32i> {{.*}}): +// CHECK-NEXT: cir.alloca !s32i, !cir.ptr<!s32i>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } + +extern "C" void acc_combined() { + // CHECK: cir.func{{.*}} @acc_combined() { + + int someInt; + // CHECK-NEXT: %[[SOMEINT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["someInt"] + float someFloat; + // CHECK-NEXT: %[[SOMEFLOAT:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["someFloat"] + NoCopyConstruct noCopy; + // CHECK-NEXT: %[[NOCOPY:.*]] = cir.alloca !rec_NoCopyConstruct, !cir.ptr<!rec_NoCopyConstruct>, ["noCopy"] + CopyConstruct hasCopy; + // CHECK-NEXT: %[[HASCOPY:.*]] = cir.alloca !rec_CopyConstruct, !cir.ptr<!rec_CopyConstruct>, ["hasCopy"] + NonDefaultCtor notDefCtor; + // CHECK-NEXT: %[[NOTDEFCTOR:.*]] = cir.alloca !rec_NonDefaultCtor, !cir.ptr<!rec_NonDefaultCtor>, ["notDefCtor", init] + HasDtor dtor; + // CHECK-NEXT: %[[DTOR:.*]] = cir.alloca !rec_HasDtor, !cir.ptr<!rec_HasDtor>, ["dtor"] + int someIntArr[5]; + // CHECK-NEXT: %[[INTARR:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["someIntArr"] + float someFloatArr[5]; + // CHECK-NEXT: %[[FLOATARR:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["someFloatArr"] + NoCopyConstruct noCopyArr[5]; + // CHECK-NEXT: %[[NOCOPYARR:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["noCopyArr"] + CopyConstruct hasCopyArr[5]; + // CHECK-NEXT: %[[HASCOPYARR:.*]] = cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["hasCopyArr"] + NonDefaultCtor notDefCtorArr[5]; + // CHECK-NEXT: %[[NOTDEFCTORARR:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["notDefCtorArr", init] + HasDtor dtorArr[5]; + // CHECK-NEXT: %[[DTORARR:.*]] = cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["dtorArr"] + // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1Ev(%[[NOTDEFCTOR]]) : (!cir.ptr<!rec_NonDefaultCtor>) -> () + +#pragma acc parallel loop private(someInt) + for(int i = 0; i < 5; ++i); + // CHECK: acc.parallel combined(loop) { + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[SOMEINT]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "someInt"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSi -> %[[PRIVATE]] : !cir.ptr<!s32i>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(someFloat) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[SOMEFLOAT]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "someFloat"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTSf -> %[[PRIVATE]] : !cir.ptr<!cir.float>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel loop private(noCopy) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPY]] : !cir.ptr<!rec_NoCopyConstruct>) -> !cir.ptr<!rec_NoCopyConstruct> {name = "noCopy"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTS15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!rec_NoCopyConstruct> + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(hasCopy) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPY]] : !cir.ptr<!rec_CopyConstruct>) -> !cir.ptr<!rec_CopyConstruct> {name = "hasCopy"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTS13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!rec_CopyConstruct>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(notDefCtor) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTOR]] : !cir.ptr<!rec_NonDefaultCtor>) -> !cir.ptr<!rec_NonDefaultCtor> {name = "notDefCtor"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTS14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!rec_NonDefaultCtor>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(dtor) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTOR]] : !cir.ptr<!rec_HasDtor>) -> !cir.ptr<!rec_HasDtor> {name = "dtor"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTS7HasDtor -> %[[PRIVATE]] : !cir.ptr<!rec_HasDtor>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel loop private(someInt, someFloat, noCopy, hasCopy, notDefCtor, dtor) + for(int i = 0; i < 5; ++i); + // CHECK: acc.parallel combined(loop) { + // CHECK: %[[PRIVATE1:.*]] = acc.private varPtr(%[[SOMEINT]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "someInt"} + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[SOMEFLOAT]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "someFloat"} + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPY]] : !cir.ptr<!rec_NoCopyConstruct>) -> !cir.ptr<!rec_NoCopyConstruct> {name = "noCopy"} + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPY]] : !cir.ptr<!rec_CopyConstruct>) -> !cir.ptr<!rec_CopyConstruct> {name = "hasCopy"} + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTOR]] : !cir.ptr<!rec_NonDefaultCtor>) -> !cir.ptr<!rec_NonDefaultCtor> {name = "notDefCtor"} + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTOR]] : !cir.ptr<!rec_HasDtor>) -> !cir.ptr<!rec_HasDtor> {name = "dtor"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSi -> %[[PRIVATE1]] : !cir.ptr<!s32i>, + // CHECK-SAME: @privatization__ZTSf -> %[[PRIVATE2]] : !cir.ptr<!cir.float>, + // CHECK-SAME: @privatization__ZTS15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!rec_NoCopyConstruct>, + // CHECK-SAME: @privatization__ZTS13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!rec_CopyConstruct>, + // CHECK-SAME: @privatization__ZTS14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!rec_NonDefaultCtor>, + // CHECK-SAME: @privatization__ZTS7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!rec_HasDtor>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial loop private(someIntArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(someFloatArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(noCopyArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(hasCopyArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(notDefCtorArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(dtorArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @privatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @privatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel loop private(someIntArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(someFloatArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(noCopyArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial loop private(hasCopyArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.serial combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} + // CHECK-NEXT: acc.loop combined(serial) private(@privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(notDefCtorArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(dtorArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel loop private(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: acc.parallel combined(loop) { + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} + // CHECK-NEXT: acc.loop combined(parallel) private(@privatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @privatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @privatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +} diff --git a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp new file mode 100644 index 0000000..a204f41 --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp @@ -0,0 +1,459 @@ +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s + +struct NoCopyConstruct {}; + +struct CopyConstruct { + CopyConstruct() = default; + CopyConstruct(const CopyConstruct&); +}; + +struct NonDefaultCtor { + NonDefaultCtor(); +}; + +struct HasDtor { + ~HasDtor(); +}; + +// CHECK: acc.private.recipe @privatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i +// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"] +// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i +// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS7HasDtor : !cir.ptr<!rec_HasDtor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasDtor> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_HasDtor, !cir.ptr<!rec_HasDtor>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasDtor> {{.*}}): +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS14NonDefaultCtor : !cir.ptr<!rec_NonDefaultCtor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_NonDefaultCtor> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_NonDefaultCtor, !cir.ptr<!rec_NonDefaultCtor>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS13CopyConstruct : !cir.ptr<!rec_CopyConstruct> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_CopyConstruct> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_CopyConstruct, !cir.ptr<!rec_CopyConstruct>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS15NoCopyConstruct : !cir.ptr<!rec_NoCopyConstruct> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_NoCopyConstruct> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_NoCopyConstruct, !cir.ptr<!rec_NoCopyConstruct>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSf : !cir.ptr<!cir.float> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSi : !cir.ptr<!s32i> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!s32i> {{.*}}): +// CHECK-NEXT: cir.alloca !s32i, !cir.ptr<!s32i>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } + +extern "C" void acc_compute() { + // CHECK: cir.func{{.*}} @acc_compute() { + + int someInt; + // CHECK-NEXT: %[[SOMEINT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["someInt"] + float someFloat; + // CHECK-NEXT: %[[SOMEFLOAT:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["someFloat"] + NoCopyConstruct noCopy; + // CHECK-NEXT: %[[NOCOPY:.*]] = cir.alloca !rec_NoCopyConstruct, !cir.ptr<!rec_NoCopyConstruct>, ["noCopy"] + CopyConstruct hasCopy; + // CHECK-NEXT: %[[HASCOPY:.*]] = cir.alloca !rec_CopyConstruct, !cir.ptr<!rec_CopyConstruct>, ["hasCopy"] + NonDefaultCtor notDefCtor; + // CHECK-NEXT: %[[NOTDEFCTOR:.*]] = cir.alloca !rec_NonDefaultCtor, !cir.ptr<!rec_NonDefaultCtor>, ["notDefCtor", init] + HasDtor dtor; + // CHECK-NEXT: %[[DTOR:.*]] = cir.alloca !rec_HasDtor, !cir.ptr<!rec_HasDtor>, ["dtor"] + int someIntArr[5]; + // CHECK-NEXT: %[[INTARR:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["someIntArr"] + float someFloatArr[5]; + // CHECK-NEXT: %[[FLOATARR:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["someFloatArr"] + NoCopyConstruct noCopyArr[5]; + // CHECK-NEXT: %[[NOCOPYARR:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["noCopyArr"] + CopyConstruct hasCopyArr[5]; + // CHECK-NEXT: %[[HASCOPYARR:.*]] = cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["hasCopyArr"] + NonDefaultCtor notDefCtorArr[5]; + // CHECK-NEXT: %[[NOTDEFCTORARR:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["notDefCtorArr", init] + HasDtor dtorArr[5]; + // CHECK-NEXT: %[[DTORARR:.*]] = cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["dtorArr"] + // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1Ev(%[[NOTDEFCTOR]]) : (!cir.ptr<!rec_NonDefaultCtor>) -> () + +#pragma acc parallel private(someInt) + ; + // CHECK: %[[PRIVATE:.*]] = acc.private varPtr(%[[SOMEINT]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "someInt"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSi -> %[[PRIVATE]] : !cir.ptr<!s32i>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(someFloat) + ; + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[SOMEFLOAT]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "someFloat"} + // CHECK-NEXT: acc.serial private(@privatization__ZTSf -> %[[PRIVATE]] : !cir.ptr<!cir.float>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel private(noCopy) + ; + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPY]] : !cir.ptr<!rec_NoCopyConstruct>) -> !cir.ptr<!rec_NoCopyConstruct> {name = "noCopy"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTS15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!rec_NoCopyConstruct> + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(hasCopy) + ; + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPY]] : !cir.ptr<!rec_CopyConstruct>) -> !cir.ptr<!rec_CopyConstruct> {name = "hasCopy"} + // CHECK-NEXT: acc.serial private(@privatization__ZTS13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!rec_CopyConstruct>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(notDefCtor) + ; + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTOR]] : !cir.ptr<!rec_NonDefaultCtor>) -> !cir.ptr<!rec_NonDefaultCtor> {name = "notDefCtor"} + // CHECK-NEXT: acc.serial private(@privatization__ZTS14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!rec_NonDefaultCtor>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(dtor) + ; + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTOR]] : !cir.ptr<!rec_HasDtor>) -> !cir.ptr<!rec_HasDtor> {name = "dtor"} + // CHECK-NEXT: acc.serial private(@privatization__ZTS7HasDtor -> %[[PRIVATE]] : !cir.ptr<!rec_HasDtor>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel private(someInt, someFloat, noCopy, hasCopy, notDefCtor, dtor) + ; + // CHECK: %[[PRIVATE1:.*]] = acc.private varPtr(%[[SOMEINT]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "someInt"} + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[SOMEFLOAT]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "someFloat"} + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPY]] : !cir.ptr<!rec_NoCopyConstruct>) -> !cir.ptr<!rec_NoCopyConstruct> {name = "noCopy"} + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPY]] : !cir.ptr<!rec_CopyConstruct>) -> !cir.ptr<!rec_CopyConstruct> {name = "hasCopy"} + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTOR]] : !cir.ptr<!rec_NonDefaultCtor>) -> !cir.ptr<!rec_NonDefaultCtor> {name = "notDefCtor"} + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTOR]] : !cir.ptr<!rec_HasDtor>) -> !cir.ptr<!rec_HasDtor> {name = "dtor"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSi -> %[[PRIVATE1]] : !cir.ptr<!s32i>, + // CHECK-SAME: @privatization__ZTSf -> %[[PRIVATE2]] : !cir.ptr<!cir.float>, + // CHECK-SAME: @privatization__ZTS15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!rec_NoCopyConstruct>, + // CHECK-SAME: @privatization__ZTS13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!rec_CopyConstruct>, + // CHECK-SAME: @privatization__ZTS14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!rec_NonDefaultCtor>, + // CHECK-SAME: @privatization__ZTS7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!rec_HasDtor>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc serial private(someIntArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} + // CHECK-NEXT: acc.serial private(@privatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(someFloatArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(noCopyArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} + // CHECK-NEXT: acc.serial private(@privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(hasCopyArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(notDefCtorArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(dtorArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} + // CHECK-NEXT: acc.serial private(@privatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @privatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @privatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc + +#pragma acc parallel private(someIntArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(someFloatArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} + // CHECK-NEXT: acc.serial private(@privatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(noCopyArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc serial private(hasCopyArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} + // CHECK-NEXT: acc.serial private(@privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(notDefCtorArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(dtorArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +#pragma acc parallel private(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) + ; + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} + // CHECK-NEXT: acc.parallel private(@privatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @privatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @privatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK-NEXT: acc.yield + // CHECK-NEXT: } loc +} diff --git a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp new file mode 100644 index 0000000..384496b --- /dev/null +++ b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp @@ -0,0 +1,459 @@ +// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s + +struct NoCopyConstruct {}; + +struct CopyConstruct { + CopyConstruct() = default; + CopyConstruct(const CopyConstruct&); +}; + +struct NonDefaultCtor { + NonDefaultCtor(); +}; + +struct HasDtor { + ~HasDtor(); +}; + +// CHECK: acc.private.recipe @privatization__ZTSA5_7HasDtor : !cir.ptr<!cir.array<!rec_HasDtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_HasDtor x 5>> {{.*}}): +// CHECK-NEXT: %[[LAST_IDX:.*]] = cir.const #cir.int<4> : !u64i +// CHECK-NEXT: %[[ARRPTR:.*]] = cir.cast(array_to_ptrdecay, %[[ARG]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[ELEM:.*]] = cir.ptr_stride(%[[ARRPTR]] : !cir.ptr<!rec_HasDtor>, %[[LAST_IDX]] : !u64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>>, ["__array_idx"] +// CHECK-NEXT: cir.store %[[ELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: cir.do { +// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ELEM_LOAD]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: %[[NEG_ONE:.*]] = cir.const #cir.int<-1> : !s64i +// CHECK-NEXT: %[[PREVELEM:.*]] = cir.ptr_stride(%[[ELEM_LOAD]] : !cir.ptr<!rec_HasDtor>, %[[NEG_ONE]] : !s64i), !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: cir.store %[[PREVELEM]], %[[ITR]] : !cir.ptr<!rec_HasDtor>, !cir.ptr<!cir.ptr<!rec_HasDtor>> +// CHECK-NEXT: cir.yield +// CHECK-NEXT: } while { +// CHECK-NEXT: %[[ELEM_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!cir.ptr<!rec_HasDtor>>, !cir.ptr<!rec_HasDtor> +// CHECK-NEXT: %[[CMP:.*]] = cir.cmp(ne, %[[ELEM_LOAD]], %[[ARRPTR]]) : !cir.ptr<!rec_HasDtor>, !cir.bool +// CHECK-NEXT: cir.condition(%[[CMP]]) +// CHECK-NEXT: } +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_14NonDefaultCtor : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_13CopyConstruct : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_15NoCopyConstruct : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_f : !cir.ptr<!cir.array<!cir.float x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!cir.float x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSA5_i : !cir.ptr<!cir.array<!s32i x 5>> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!s32i x 5>> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS7HasDtor : !cir.ptr<!rec_HasDtor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasDtor> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_HasDtor, !cir.ptr<!rec_HasDtor>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } destroy { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_HasDtor> {{.*}}): +// CHECK-NEXT: cir.call @_ZN7HasDtorD1Ev(%[[ARG]]) nothrow : (!cir.ptr<!rec_HasDtor>) -> () +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS14NonDefaultCtor : !cir.ptr<!rec_NonDefaultCtor> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_NonDefaultCtor> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_NonDefaultCtor, !cir.ptr<!rec_NonDefaultCtor>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS13CopyConstruct : !cir.ptr<!rec_CopyConstruct> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_CopyConstruct> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_CopyConstruct, !cir.ptr<!rec_CopyConstruct>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTS15NoCopyConstruct : !cir.ptr<!rec_NoCopyConstruct> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!rec_NoCopyConstruct> {{.*}}): +// CHECK-NEXT: cir.alloca !rec_NoCopyConstruct, !cir.ptr<!rec_NoCopyConstruct>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSf : !cir.ptr<!cir.float> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.float> {{.*}}): +// CHECK-NEXT: cir.alloca !cir.float, !cir.ptr<!cir.float>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } +// +// CHECK-NEXT: acc.private.recipe @privatization__ZTSi : !cir.ptr<!s32i> init { +// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!s32i> {{.*}}): +// CHECK-NEXT: cir.alloca !s32i, !cir.ptr<!s32i>, ["openacc.private.init"] +// CHECK-NEXT: acc.yield +// CHECK-NEXT: } + +extern "C" void acc_loop() { + // CHECK: cir.func{{.*}} @acc_loop() { + + int someInt; + // CHECK-NEXT: %[[SOMEINT:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["someInt"] + float someFloat; + // CHECK-NEXT: %[[SOMEFLOAT:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["someFloat"] + NoCopyConstruct noCopy; + // CHECK-NEXT: %[[NOCOPY:.*]] = cir.alloca !rec_NoCopyConstruct, !cir.ptr<!rec_NoCopyConstruct>, ["noCopy"] + CopyConstruct hasCopy; + // CHECK-NEXT: %[[HASCOPY:.*]] = cir.alloca !rec_CopyConstruct, !cir.ptr<!rec_CopyConstruct>, ["hasCopy"] + NonDefaultCtor notDefCtor; + // CHECK-NEXT: %[[NOTDEFCTOR:.*]] = cir.alloca !rec_NonDefaultCtor, !cir.ptr<!rec_NonDefaultCtor>, ["notDefCtor", init] + HasDtor dtor; + // CHECK-NEXT: %[[DTOR:.*]] = cir.alloca !rec_HasDtor, !cir.ptr<!rec_HasDtor>, ["dtor"] + int someIntArr[5]; + // CHECK-NEXT: %[[INTARR:.*]] = cir.alloca !cir.array<!s32i x 5>, !cir.ptr<!cir.array<!s32i x 5>>, ["someIntArr"] + float someFloatArr[5]; + // CHECK-NEXT: %[[FLOATARR:.*]] = cir.alloca !cir.array<!cir.float x 5>, !cir.ptr<!cir.array<!cir.float x 5>>, ["someFloatArr"] + NoCopyConstruct noCopyArr[5]; + // CHECK-NEXT: %[[NOCOPYARR:.*]] = cir.alloca !cir.array<!rec_NoCopyConstruct x 5>, !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, ["noCopyArr"] + CopyConstruct hasCopyArr[5]; + // CHECK-NEXT: %[[HASCOPYARR:.*]] = cir.alloca !cir.array<!rec_CopyConstruct x 5>, !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, ["hasCopyArr"] + NonDefaultCtor notDefCtorArr[5]; + // CHECK-NEXT: %[[NOTDEFCTORARR:.*]] = cir.alloca !cir.array<!rec_NonDefaultCtor x 5>, !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, ["notDefCtorArr", init] + HasDtor dtorArr[5]; + // CHECK-NEXT: %[[DTORARR:.*]] = cir.alloca !cir.array<!rec_HasDtor x 5>, !cir.ptr<!cir.array<!rec_HasDtor x 5>>, ["dtorArr"] + // CHECK-NEXT: cir.call @_ZN14NonDefaultCtorC1Ev(%[[NOTDEFCTOR]]) : (!cir.ptr<!rec_NonDefaultCtor>) -> () + +#pragma acc loop private(someInt) + for(int i = 0; i < 5; ++i); + // CHECK: %[[PRIVATE:.*]] = acc.private varPtr(%[[SOMEINT]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "someInt"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSi -> %[[PRIVATE]] : !cir.ptr<!s32i>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(someFloat) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[SOMEFLOAT]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "someFloat"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSf -> %[[PRIVATE]] : !cir.ptr<!cir.float>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop private(noCopy) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPY]] : !cir.ptr<!rec_NoCopyConstruct>) -> !cir.ptr<!rec_NoCopyConstruct> {name = "noCopy"} + // CHECK-NEXT: acc.loop private(@privatization__ZTS15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!rec_NoCopyConstruct> + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(hasCopy) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPY]] : !cir.ptr<!rec_CopyConstruct>) -> !cir.ptr<!rec_CopyConstruct> {name = "hasCopy"} + // CHECK-NEXT: acc.loop private(@privatization__ZTS13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!rec_CopyConstruct>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(notDefCtor) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTOR]] : !cir.ptr<!rec_NonDefaultCtor>) -> !cir.ptr<!rec_NonDefaultCtor> {name = "notDefCtor"} + // CHECK-NEXT: acc.loop private(@privatization__ZTS14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!rec_NonDefaultCtor>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(dtor) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTOR]] : !cir.ptr<!rec_HasDtor>) -> !cir.ptr<!rec_HasDtor> {name = "dtor"} + // CHECK-NEXT: acc.loop private(@privatization__ZTS7HasDtor -> %[[PRIVATE]] : !cir.ptr<!rec_HasDtor>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop private(someInt, someFloat, noCopy, hasCopy, notDefCtor, dtor) + for(int i = 0; i < 5; ++i); + // CHECK: %[[PRIVATE1:.*]] = acc.private varPtr(%[[SOMEINT]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "someInt"} + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[SOMEFLOAT]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "someFloat"} + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPY]] : !cir.ptr<!rec_NoCopyConstruct>) -> !cir.ptr<!rec_NoCopyConstruct> {name = "noCopy"} + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPY]] : !cir.ptr<!rec_CopyConstruct>) -> !cir.ptr<!rec_CopyConstruct> {name = "hasCopy"} + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTOR]] : !cir.ptr<!rec_NonDefaultCtor>) -> !cir.ptr<!rec_NonDefaultCtor> {name = "notDefCtor"} + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTOR]] : !cir.ptr<!rec_HasDtor>) -> !cir.ptr<!rec_HasDtor> {name = "dtor"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSi -> %[[PRIVATE1]] : !cir.ptr<!s32i>, + // CHECK-SAME: @privatization__ZTSf -> %[[PRIVATE2]] : !cir.ptr<!cir.float>, + // CHECK-SAME: @privatization__ZTS15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!rec_NoCopyConstruct>, + // CHECK-SAME: @privatization__ZTS13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!rec_CopyConstruct>, + // CHECK-SAME: @privatization__ZTS14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!rec_NonDefaultCtor>, + // CHECK-SAME: @privatization__ZTS7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!rec_HasDtor>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop private(someIntArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(someFloatArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(noCopyArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(hasCopyArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(notDefCtorArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(dtorArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @privatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @privatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc + +#pragma acc loop private(someIntArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_i -> %[[PRIVATE]] : !cir.ptr<!cir.array<!s32i x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(someFloatArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_f -> %[[PRIVATE]] : !cir.ptr<!cir.array<!cir.float x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(noCopyArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(hasCopyArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(notDefCtorArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(dtorArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_7HasDtor -> %[[PRIVATE]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +#pragma acc loop private(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) + for(int i = 0; i < 5; ++i); + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr<!cir.array<!s32i x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!s32i x 5>> {name = "someIntArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr<!cir.array<!cir.float x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!cir.float x 5>> {name = "someFloatArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>> {name = "noCopyArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_CopyConstruct x 5>> {name = "hasCopyArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>> {name = "notDefCtorArr[1:1]"} + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 + // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 + // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) + // CHECK-NEXT: %[[PRIVATE6:.*]] = acc.private varPtr(%[[DTORARR]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) bounds(%[[BOUNDS]]) -> !cir.ptr<!cir.array<!rec_HasDtor x 5>> {name = "dtorArr[1:1]"} + // CHECK-NEXT: acc.loop private(@privatization__ZTSA5_i -> %[[PRIVATE1]] : !cir.ptr<!cir.array<!s32i x 5>>, + // CHECK-SAME: @privatization__ZTSA5_f -> %[[PRIVATE2]] : !cir.ptr<!cir.array<!cir.float x 5>>, + // CHECK-SAME: @privatization__ZTSA5_15NoCopyConstruct -> %[[PRIVATE3]] : !cir.ptr<!cir.array<!rec_NoCopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_13CopyConstruct -> %[[PRIVATE4]] : !cir.ptr<!cir.array<!rec_CopyConstruct x 5>>, + // CHECK-SAME: @privatization__ZTSA5_14NonDefaultCtor -> %[[PRIVATE5]] : !cir.ptr<!cir.array<!rec_NonDefaultCtor x 5>>, + // CHECK-SAME: @privatization__ZTSA5_7HasDtor -> %[[PRIVATE6]] : !cir.ptr<!cir.array<!rec_HasDtor x 5>>) + // CHECK: acc.yield + // CHECK-NEXT: } loc +} diff --git a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp index 1dfb2ed..0bf932e 100644 --- a/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp +++ b/clang/test/CIR/CodeGenOpenACC/openacc-not-implemented.cpp @@ -10,8 +10,8 @@ void HelloWorld(int *A, int *B, int *C, int N) { // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Declare Construct}} #pragma acc declare create(A) - // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: private}} -#pragma acc parallel loop private(A) + // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: firstprivate}} +#pragma acc parallel loop firstprivate(A) for(int i = 0; i <5; ++i); // expected-error@+1{{ClangIR code gen Not Yet Implemented: OpenACC Clause: reduction}} #pragma acc parallel loop reduction(+:A) diff --git a/clang/test/ClangScanDeps/modules-debug-dir.c b/clang/test/ClangScanDeps/modules-debug-dir.c index c4fb498..e2a0e1b 100644 --- a/clang/test/ClangScanDeps/modules-debug-dir.c +++ b/clang/test/ClangScanDeps/modules-debug-dir.c @@ -1,5 +1,3 @@ -// REQUIRES: shell - // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json @@ -7,6 +5,12 @@ // RUN: experimental-full -optimize-args=all > %t/result.json // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck %s +// RUN: %deps-to-rsp %t/result.json --module-name=mod > %t/mod.rsp +// RUN: %clang @%t/mod.rsp -o %t/mod.pcm +// RUN: llvm-dwarfdump --debug-info %t/mod.pcm | FileCheck %s --check-prefix=DWARF +// DWARF: DW_TAG_compile_unit +// DWARF-NOT: DW_AT_comp_dir + //--- cdb.json.in [{ "directory": "DIR", @@ -28,5 +32,5 @@ module mod { // directory when current working directory optimization is in effect. // CHECK: "modules": [ // CHECK: "command-line": [ -// CHECK: "-fdebug-compilation-dir={{\/|.*:(\\)?}}", +// CHECK-NOT: -fdebug-compilation-dir // CHECK: "translation-units": [ diff --git a/clang/test/CodeGen/atomic-arm.c b/clang/test/CodeGen/atomic-arm.c index 6952b4d..e6c2b8d 100644 --- a/clang/test/CodeGen/atomic-arm.c +++ b/clang/test/CodeGen/atomic-arm.c @@ -2,7 +2,10 @@ // RUN: %clang_cc1 -triple thumbv7m-apple-unknown-macho %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-V7M // RUN: %clang_cc1 -triple thumbv7-apple-ios13.0 %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HOSTED // RUN: %clang_cc1 -triple thumbv7k-apple-watchos5.0 %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HOSTED - +// RUN: %clang_cc1 -triple arm-linux-gnueabi %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HOSTED +// RUN: %clang_cc1 -triple armv7-none-eabi %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HOSTED +// RUN: %clang_cc1 -triple thumbv6k-none-eabi %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HOSTED +// RUN: %clang_cc1 -triple armv5-none-eabi %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-V6M // CHECK-V6M: @always1 = global i32 0 // CHECK-V6M: @always4 = global i32 0 @@ -22,7 +25,7 @@ int always8 = __atomic_always_lock_free(8, 0); int lock_free_1() { // CHECK-LABEL: @lock_free_1 - // CHECK-V6M: [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_is_lock_free(i32 noundef 1, ptr noundef null) + // CHECK-V6M: [[RES:%.*]] = call{{.*}}zeroext i1 @__atomic_is_lock_free(i32 noundef 1, ptr noundef null) // CHECK-V6M: [[RES32:%.*]] = zext i1 [[RES]] to i32 // CHECK-V6M: ret i32 [[RES32]] @@ -33,7 +36,7 @@ int lock_free_1() { int lock_free_4() { // CHECK-LABEL: @lock_free_4 - // CHECK-V6M: [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_is_lock_free(i32 noundef 4, ptr noundef null) + // CHECK-V6M: [[RES:%.*]] = call{{.*}}zeroext i1 @__atomic_is_lock_free(i32 noundef 4, ptr noundef null) // CHECK-V6M: [[RES32:%.*]] = zext i1 [[RES]] to i32 // CHECK-V6M: ret i32 [[RES32]] @@ -44,11 +47,11 @@ int lock_free_4() { int lock_free_8() { // CHECK-LABEL: @lock_free_8 - // CHECK-V6M: [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_is_lock_free(i32 noundef 8, ptr noundef null) + // CHECK-V6M: [[RES:%.*]] = call{{.*}}zeroext i1 @__atomic_is_lock_free(i32 noundef 8, ptr noundef null) // CHECK-V6M: [[RES32:%.*]] = zext i1 [[RES]] to i32 // CHECK-V6M: ret i32 [[RES32]] - // CHECK-V7M: [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_is_lock_free(i32 noundef 8, ptr noundef null) + // CHECK-V7M: [[RES:%.*]] = call{{.*}}zeroext i1 @__atomic_is_lock_free(i32 noundef 8, ptr noundef null) // CHECK-V7M: [[RES32:%.*]] = zext i1 [[RES]] to i32 // CHECK-V7M: ret i32 [[RES32]] diff --git a/clang/test/CodeGen/attr-counted-by.c b/clang/test/CodeGen/attr-counted-by.c index 101949a..9fb50c6 100644 --- a/clang/test/CodeGen/attr-counted-by.c +++ b/clang/test/CodeGen/attr-counted-by.c @@ -2445,3 +2445,39 @@ struct { size_t test36() { return __builtin_dynamic_object_size(&x.dev_addr[4], 1); } + +// SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test37( +// SANITIZE-WITH-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITH-ATTR-NEXT: entry: +// SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8 +// SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 +// SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 +// SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nsw i64 [[COUNT]], 2 +// SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1 +// SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0 +// SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP1]] +// +// NO-SANITIZE-WITH-ATTR-LABEL: define dso_local range(i64 -8589934592, 8589934589) i64 @test37( +// NO-SANITIZE-WITH-ATTR-SAME: ptr noundef readonly captures(none) [[PTR:%.*]]) local_unnamed_addr #[[ATTR2]] { +// NO-SANITIZE-WITH-ATTR-NEXT: entry: +// NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_GEP:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i64 8 +// NO-SANITIZE-WITH-ATTR-NEXT: [[COUNTED_BY_LOAD:%.*]] = load i32, ptr [[COUNTED_BY_GEP]], align 4 +// NO-SANITIZE-WITH-ATTR-NEXT: [[COUNT:%.*]] = sext i32 [[COUNTED_BY_LOAD]] to i64 +// NO-SANITIZE-WITH-ATTR-NEXT: [[FLEXIBLE_ARRAY_MEMBER_SIZE:%.*]] = shl nsw i64 [[COUNT]], 2 +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[COUNTED_BY_LOAD]], -1 +// NO-SANITIZE-WITH-ATTR-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 [[FLEXIBLE_ARRAY_MEMBER_SIZE]], i64 0 +// NO-SANITIZE-WITH-ATTR-NEXT: ret i64 [[TMP1]] +// +// SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test37( +// SANITIZE-WITHOUT-ATTR-SAME: ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +// SANITIZE-WITHOUT-ATTR-NEXT: entry: +// SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 +// +// NO-SANITIZE-WITHOUT-ATTR-LABEL: define dso_local i64 @test37( +// NO-SANITIZE-WITHOUT-ATTR-SAME: ptr noundef readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR1]] { +// NO-SANITIZE-WITHOUT-ATTR-NEXT: entry: +// NO-SANITIZE-WITHOUT-ATTR-NEXT: ret i64 -1 +// +size_t test37(struct annotated *ptr) { + return __builtin_dynamic_object_size((1, 2, (4, 5, (7, 8, 9, (10, ptr->array)))), 1); +} diff --git a/clang/test/CodeGen/dbg-info-all-calls-described.c b/clang/test/CodeGen/dbg-info-all-calls-described.c new file mode 100644 index 0000000..3ca3aaa --- /dev/null +++ b/clang/test/CodeGen/dbg-info-all-calls-described.c @@ -0,0 +1,88 @@ +// Test that call site debug info is (un)supported in various configurations. + +// Supported: DWARF5, -O1, standalone DI +// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -O1 -disable-llvm-passes \ +// RUN: -debug-info-kind=standalone -dwarf-version=5 \ +// RUN: | FileCheck %s -check-prefix=HAS-ATTR \ +// RUN: -implicit-check-not=DISubprogram -implicit-check-not=DIFlagAllCallsDescribed + +// Supported: DWARF4 + LLDB tuning, -O1, limited DI +// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -O1 -disable-llvm-passes \ +// RUN: -debugger-tuning=lldb \ +// RUN: -debug-info-kind=standalone -dwarf-version=4 \ +// RUN: | FileCheck %s -check-prefix=HAS-ATTR \ +// RUN: -implicit-check-not=DISubprogram -implicit-check-not=DIFlagAllCallsDescribed + +// Note: DIFlagAllCallsDescribed may have been enabled prematurely when tuning +// for GDB under -gdwarf-4 in https://reviews.llvm.org/D69743. It's possible +// this should have been 'Unsupported' until entry values emission was enabled +// by default. +// +// Supported: DWARF4 + GDB tuning +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu \ +// RUN: %s -o - -O1 -disable-llvm-passes -debugger-tuning=gdb \ +// RUN: -debug-info-kind=standalone -dwarf-version=4 \ +// RUN: | FileCheck %s -check-prefix=HAS-ATTR \ +// RUN: -implicit-check-not=DIFlagAllCallsDescribed + +// Supported: DWARF4 + LLDB, -O1 +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu \ +// RUN: %s -o - -O1 -disable-llvm-passes -debugger-tuning=lldb \ +// RUN: -debug-info-kind=standalone -dwarf-version=4 \ +// RUN: | FileCheck %s -check-prefix=HAS-ATTR \ +// RUN: -implicit-check-not=DIFlagAllCallsDescribed + +// Unsupported: -O0 +// RUN: %clang_cc1 -emit-llvm -triple x86_64-linux-gnu \ +// RUN: %s -o - -O0 -disable-llvm-passes -debugger-tuning=gdb \ +// RUN: -debug-info-kind=standalone -dwarf-version=4 \ +// RUN: | FileCheck %s -check-prefix=NO-ATTR + +// Supported: DWARF4 + LLDB tuning, -O1, line-tables only DI +// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -O1 -disable-llvm-passes \ +// RUN: -debugger-tuning=lldb \ +// RUN: -debug-info-kind=line-tables-only -dwarf-version=4 \ +// RUN: | FileCheck %s -check-prefix=LINE-TABLES-ONLY + +// Unsupported: -O0 +// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -O0 \ +// RUN: -debug-info-kind=standalone -dwarf-version=5 \ +// RUN: | FileCheck %s -check-prefix=NO-ATTR + +// Unsupported: DWARF4 +// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -O1 -disable-llvm-passes \ +// RUN: -debug-info-kind=standalone -dwarf-version=4 \ +// RUN: | FileCheck %s -check-prefix=NO-ATTR + +// NO-ATTR-NOT: FlagAllCallsDescribed + +// HAS-ATTR-DAG: DISubprogram(name: "declaration1", {{.*}}, spFlags: DISPFlagOptimized) +// HAS-ATTR-DAG: DISubprogram(name: "declaration2", {{.*}}, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized +// HAS-ATTR-DAG: DISubprogram(name: "declaration3", {{.*}}, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +// HAS-ATTR-DAG: DISubprogram(name: "declaration4", {{.*}}, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized + +// HAS-ATTR-DAG: DISubprogram(name: "force_irgen", {{.*}}, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition + +// LINE-TABLES-ONLY: DISubprogram(name: "force_irgen", {{.*}}, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition + +void declaration1(); + +void declaration2(); + +void declaration2() {} + +void declaration3(void); + +void declaration4(void); + +void declaration4(void) {} + +void __attribute__((optnone)) force_irgen(void) { + declaration1(); + declaration3(); +} diff --git a/clang/test/CodeGen/pr45476.cpp b/clang/test/CodeGen/pr45476.cpp index 84e7a98..c95f7fb 100644 --- a/clang/test/CodeGen/pr45476.cpp +++ b/clang/test/CodeGen/pr45476.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple arm-unknown-linux-gnueabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s +// RUN: %clang_cc1 -triple armv6m-eabi -emit-llvm %s -o - | FileCheck -check-prefix=LIBCALL %s // RUN: %clang_cc1 -triple armv8-eabi -emit-llvm %s -o - | FileCheck -check-prefix=NATIVE %s // PR45476 diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index efd70a9..df71ead 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -108,7 +108,7 @@ // GFX1153: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1200: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" // GFX1201: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+fp8-conversion-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize32" -// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32 +// GFX1250: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-trans-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot7-insts,+dot8-insts,+dpp,+fp8-conversion-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+permlane16-swap,+prng-inst,+setprio-inc-wg-inst,+tanh-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+wavefrontsize32" // GFX1103-W64: "target-features"="+16-bit-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot10-insts,+dot12-insts,+dot5-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx8-insts,+gfx9-insts,+wavefrontsize64" diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 51ab970..150c6ce 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -674,6 +674,100 @@ void test_cvt_scale_pk(global half8 *outh8, global bfloat8 *outy8, uint2 src2, *outf8 = __builtin_amdgcn_cvt_scale_pk8_f32_fp4(src1, scale, 7); } +// CHECK-LABEL: @test_cvt_scalef32_pk( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT2_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRCBF8_ADDR:%.*]] = alloca <8 x bfloat>, align 16, addrspace(5) +// CHECK-NEXT: [[SRCH8_ADDR:%.*]] = alloca <8 x half>, align 16, addrspace(5) +// CHECK-NEXT: [[SRCF8_ADDR:%.*]] = alloca <8 x float>, align 32, addrspace(5) +// CHECK-NEXT: [[OUT3_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRCBF16_ADDR:%.*]] = alloca <16 x bfloat>, align 32, addrspace(5) +// CHECK-NEXT: [[SRCH16_ADDR:%.*]] = alloca <16 x half>, align 32, addrspace(5) +// CHECK-NEXT: [[SRCF16_ADDR:%.*]] = alloca <16 x float>, align 64, addrspace(5) +// CHECK-NEXT: [[OUT1_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SCALE_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[OUT2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT2_ADDR]] to ptr +// CHECK-NEXT: [[SRCBF8_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRCBF8_ADDR]] to ptr +// CHECK-NEXT: [[SRCH8_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRCH8_ADDR]] to ptr +// CHECK-NEXT: [[SRCF8_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRCF8_ADDR]] to ptr +// CHECK-NEXT: [[OUT3_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT3_ADDR]] to ptr +// CHECK-NEXT: [[SRCBF16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRCBF16_ADDR]] to ptr +// CHECK-NEXT: [[SRCH16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRCH16_ADDR]] to ptr +// CHECK-NEXT: [[SRCF16_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRCF16_ADDR]] to ptr +// CHECK-NEXT: [[OUT1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT1_ADDR]] to ptr +// CHECK-NEXT: [[SCALE_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SCALE_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT2:%.*]], ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <8 x bfloat> [[SRCBF8:%.*]], ptr [[SRCBF8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store <8 x half> [[SRCH8:%.*]], ptr [[SRCH8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store <8 x float> [[SRCF8:%.*]], ptr [[SRCF8_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store ptr addrspace(1) [[OUT3:%.*]], ptr [[OUT3_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <16 x bfloat> [[SRCBF16:%.*]], ptr [[SRCBF16_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <16 x half> [[SRCH16:%.*]], ptr [[SRCH16_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <16 x float> [[SRCF16:%.*]], ptr [[SRCF16_ADDR_ASCAST]], align 64 +// CHECK-NEXT: store ptr addrspace(1) [[OUT1:%.*]], ptr [[OUT1_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store float [[SCALE:%.*]], ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[SRCBF8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> [[TMP0]], float [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(1) [[TMP3]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x bfloat>, ptr [[SRCBF8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> [[TMP4]], float [[TMP5]]) +// CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(1), ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(1) [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = load <8 x half>, ptr [[SRCH8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> [[TMP8]], float [[TMP9]]) +// CHECK-NEXT: [[TMP11:%.*]] = load ptr addrspace(1), ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP10]], ptr addrspace(1) [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = load <8 x half>, ptr [[SRCH8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP14:%.*]] = call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> [[TMP12]], float [[TMP13]]) +// CHECK-NEXT: [[TMP15:%.*]] = load ptr addrspace(1), ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP14]], ptr addrspace(1) [[TMP15]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = load <8 x float>, ptr [[SRCF8_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> [[TMP16]], float [[TMP17]]) +// CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP18]], ptr addrspace(1) [[TMP19]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = load <8 x float>, ptr [[SRCF8_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP21:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP22:%.*]] = call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> [[TMP20]], float [[TMP21]]) +// CHECK-NEXT: [[TMP23:%.*]] = load ptr addrspace(1), ptr [[OUT2_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store <2 x i32> [[TMP22]], ptr addrspace(1) [[TMP23]], align 8 +// CHECK-NEXT: [[TMP24:%.*]] = load <8 x float>, ptr [[SRCF8_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> [[TMP24]], float [[TMP25]]) +// CHECK-NEXT: [[TMP27:%.*]] = load ptr addrspace(1), ptr [[OUT1_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP26]], ptr addrspace(1) [[TMP27]], align 4 +// CHECK-NEXT: [[TMP28:%.*]] = load <8 x half>, ptr [[SRCH8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP29:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> [[TMP28]], float [[TMP29]]) +// CHECK-NEXT: [[TMP31:%.*]] = load ptr addrspace(1), ptr [[OUT1_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) [[TMP31]], align 4 +// CHECK-NEXT: [[TMP32:%.*]] = load <8 x bfloat>, ptr [[SRCBF8_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP33:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> [[TMP32]], float [[TMP33]]) +// CHECK-NEXT: [[TMP35:%.*]] = load ptr addrspace(1), ptr [[OUT1_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP34]], ptr addrspace(1) [[TMP35]], align 4 +// CHECK-NEXT: ret void +// +void test_cvt_scalef32_pk(global uint2 *out2, bfloat8 srcbf8, half8 srch8, float8 srcf8, + global uint3 *out3, bfloat16 srcbf16, half16 srch16, float16 srcf16, + global uint *out1, float scale) +{ + *out2 = __builtin_amdgcn_cvt_scalef32_pk8_fp8_bf16(srcbf8, scale); + *out2 = __builtin_amdgcn_cvt_scalef32_pk8_bf8_bf16(srcbf8, scale); + *out2 = __builtin_amdgcn_cvt_scalef32_pk8_fp8_f16(srch8, scale); + *out2 = __builtin_amdgcn_cvt_scalef32_pk8_bf8_f16(srch8, scale); + *out2 = __builtin_amdgcn_cvt_scalef32_pk8_fp8_f32(srcf8, scale); + *out2 = __builtin_amdgcn_cvt_scalef32_pk8_bf8_f32(srcf8, scale); + *out1 = __builtin_amdgcn_cvt_scalef32_pk8_fp4_f32(srcf8, scale); + *out1 = __builtin_amdgcn_cvt_scalef32_pk8_fp4_f16(srch8, scale); + *out1 = __builtin_amdgcn_cvt_scalef32_pk8_fp4_bf16(srcbf8, scale); +} + // CHECK-LABEL: @test_sat_pk4_i4_i8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -744,6 +838,132 @@ void test_permlane16_swap(global uint2* out, uint old, uint src) { *out = __builtin_amdgcn_permlane16_swap(old, src, false, true); } +// CHECK-LABEL: @test_permlane_bcast( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[SRC0_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC0_ADDR]] to ptr +// CHECK-NEXT: [[SRC1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC1_ADDR]] to ptr +// CHECK-NEXT: [[SRC2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC2_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC2:%.*]], ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane.bcast(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_permlane_bcast(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_permlane_bcast(src0, src1, src2); +} + +// CHECK-LABEL: @test_permlane_down( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[SRC0_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC0_ADDR]] to ptr +// CHECK-NEXT: [[SRC1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC1_ADDR]] to ptr +// CHECK-NEXT: [[SRC2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC2_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC2:%.*]], ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane.down(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_permlane_down(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_permlane_down(src0, src1, src2); +} + +// CHECK-LABEL: @test_permlane_up( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[SRC0_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC0_ADDR]] to ptr +// CHECK-NEXT: [[SRC1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC1_ADDR]] to ptr +// CHECK-NEXT: [[SRC2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC2_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC2:%.*]], ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane.up(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_permlane_up(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_permlane_up(src0, src1, src2); +} + +// CHECK-LABEL: @test_permlane_xor( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC2_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[SRC0_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC0_ADDR]] to ptr +// CHECK-NEXT: [[SRC1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC1_ADDR]] to ptr +// CHECK-NEXT: [[SRC2_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC2_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC2:%.*]], ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[SRC2_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.permlane.xor(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4 +// CHECK-NEXT: ret void +// +void test_permlane_xor(global uint* out, uint src0, uint src1, uint src2) { + *out = __builtin_amdgcn_permlane_xor(src0, src1, src2); +} + +// CHECK-LABEL: @test_permlane_idx_gen( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[SRC0_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[SRC1_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[SRC0_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC0_ADDR]] to ptr +// CHECK-NEXT: [[SRC1_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC1_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[SRC0:%.*]], ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[SRC1:%.*]], ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC0_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC1_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.permlane.idx.gen(i32 [[TMP0]], i32 [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], ptr addrspace(1) [[TMP3]], align 4 +// CHECK-NEXT: ret void +// +void test_permlane_idx_gen(global uint* out, uint src0, uint src1) { + *out = __builtin_amdgcn_permlane_idx_gen(src0, src1); +} + // CHECK-LABEL: @test_prefetch( // CHECK-NEXT: entry: // CHECK-NEXT: [[FPTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) diff --git a/clang/test/Driver/crash-report-modules.m b/clang/test/Driver/crash-report-modules.m index e6d0335..7f669dc 100644 --- a/clang/test/Driver/crash-report-modules.m +++ b/clang/test/Driver/crash-report-modules.m @@ -1,15 +1,12 @@ -// FIXME: Instead of %T/crmdir, it would be nice to just use %t, but the -// filename ran into path length limits for the rm command on some Windows -// bots. -// RUN: rm -rf %T/crmdir -// RUN: mkdir -p %T/crmdir/i %T/crmdir/m +// RUN: rm -rf %t/crmdir +// RUN: mkdir -p %t/crmdir/i %t/crmdir/m -// RUN: env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%T/crmdir TEMP=%T/crmdir TMP=%T/crmdir \ +// RUN: env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t/crmdir TEMP=%t/crmdir TMP=%t/crmdir \ // RUN: not %clang -fsyntax-only %s -I %S/Inputs/module -isysroot %/t/i/ \ -// RUN: -fmodules -fmodules-cache-path=%T/crmdir/m/ -DFOO=BAR 2>&1 | FileCheck %s +// RUN: -fmodules -fmodules-cache-path=%t/crmdir/m/ -DFOO=BAR 2>&1 | FileCheck %s -// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %T/crmdir/crash-report-*.m -// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %T/crmdir/crash-report-*.sh +// RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crmdir/crash-report-*.m +// RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crmdir/crash-report-*.sh // REQUIRES: crash-recovery // FIXME: This test creates excessively deep directory hierarchies that cause diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu index e4f8374..b4a521d 100644 --- a/clang/test/Driver/cuda-arch-translation.cu +++ b/clang/test/Driver/cuda-arch-translation.cu @@ -68,19 +68,19 @@ // HIP: clang-offload-bundler -// SM20:--image=profile=sm_20{{.*}} -// SM21:--image=profile=sm_21{{.*}} -// SM30:--image=profile=sm_30{{.*}} -// SM32:--image=profile=sm_32{{.*}} -// SM35:--image=profile=sm_35{{.*}} -// SM37:--image=profile=sm_37{{.*}} -// SM50:--image=profile=sm_50{{.*}} -// SM52:--image=profile=sm_52{{.*}} -// SM53:--image=profile=sm_53{{.*}} -// SM60:--image=profile=sm_60{{.*}} -// SM61:--image=profile=sm_61{{.*}} -// SM62:--image=profile=sm_62{{.*}} -// SM70:--image=profile=sm_70{{.*}} +// SM20:--image3=kind=elf,sm=20{{.*}} +// SM21:--image3=kind=elf,sm=21{{.*}} +// SM30:--image3=kind=elf,sm=30{{.*}} +// SM32:--image3=kind=elf,sm=32{{.*}} +// SM35:--image3=kind=elf,sm=35{{.*}} +// SM37:--image3=kind=elf,sm=37{{.*}} +// SM50:--image3=kind=elf,sm=50{{.*}} +// SM52:--image3=kind=elf,sm=52{{.*}} +// SM53:--image3=kind=elf,sm=53{{.*}} +// SM60:--image3=kind=elf,sm=60{{.*}} +// SM61:--image3=kind=elf,sm=61{{.*}} +// SM62:--image3=kind=elf,sm=62{{.*}} +// SM70:--image3=kind=elf,sm=70{{.*}} // GFX600:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx600 // GFX601:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx601 // GFX602:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx602 diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu index db6536c..fc8e83a 100644 --- a/clang/test/Driver/cuda-options.cu +++ b/clang/test/Driver/cuda-options.cu @@ -243,10 +243,10 @@ // INCLUDES-DEVICE:fatbinary // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" -// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" -// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" -// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" -// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" +// INCLUDES-DEVICE-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE]]" +// INCLUDES-DEVICE-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE]]" +// INCLUDES-DEVICE2-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE2]]" +// INCLUDES-DEVICE2-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE2]]" // Match host-side preprocessor job with -save-temps. // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu" @@ -288,9 +288,9 @@ // FATBIN-COMMON:fatbinary // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" -// FATBIN-COMMON: "--image=profile=sm_52,file= -// PTX-SM52: "--image=profile=compute_52,file= -// NOPTX-SM52-NOT: "--image=profile=compute_52,file= -// FATBIN-COMMON: "--image=profile=sm_60,file= -// PTX-SM60: "--image=profile=compute_60,file= -// NOPTX-SM60-NOT: "--image=profile=compute_60,file= +// FATBIN-COMMON: "--image3=kind=elf,sm=52,file= +// PTX-SM52: "--image3=kind=ptx,sm=52,file= +// NOPTX-SM52-NOT: "--image3=kind=ptx,sm=52,file= +// FATBIN-COMMON: "--image3=kind=elf,sm=60,file= +// PTX-SM60: "--image3=kind=ptx,sm=60,file= +// NOPTX-SM60-NOT: "--image3=kind=ptx,sm=60,file= diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip index 9f542c2..f51ab32 100644 --- a/clang/test/Driver/hip-offload-compress-zlib.hip +++ b/clang/test/Driver/hip-offload-compress-zlib.hip @@ -14,7 +14,7 @@ // CHECK: clang-offload-bundler{{.*}} -type=bc // CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-unknown-gfx1100,hip-amdgcn-amd-amdhsa-unknown-gfx1101 -// CHECK-SAME: -compress -verbose -compression-level=9 +// CHECK-SAME: --compress --verbose --compression-level=9 // CHECK: Compressed bundle format // Test uncompress of bundled bitcode. @@ -41,4 +41,4 @@ // CO: clang-offload-bundler{{.*}} "-type=o" // CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101 -// CO-SAME: "-compress" "-verbose" +// CO-SAME: "--compress" "--verbose" diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip index dfe681f..f91c10f 100644 --- a/clang/test/Driver/hip-offload-compress-zstd.hip +++ b/clang/test/Driver/hip-offload-compress-zstd.hip @@ -14,7 +14,7 @@ // CHECK: clang-offload-bundler{{.*}} -type=bc // CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-unknown-gfx1100,hip-amdgcn-amd-amdhsa-unknown-gfx1101 -// CHECK-SAME: -compress -verbose -compression-level=9 +// CHECK-SAME: --compress --verbose --compression-level=9 // CHECK: Compressed bundle format // Test uncompress of bundled bitcode. @@ -41,4 +41,16 @@ // CO: clang-offload-bundler{{.*}} "-type=o" // CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101 -// CO-SAME: "-compress" "-verbose" +// CO-SAME: "--compress" "--verbose" + +// RUN: rm -rf %t.bc +// RUN: %clang -### -v --target=x86_64-linux-gnu \ +// RUN: -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \ +// RUN: --offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \ +// RUN: %S/Inputs/hip_multiple_inputs/a.cu \ +// RUN: --offload-compress --offload-compression-level=9 \ +// RUN: --gpu-bundle-output \ +// RUN: -o %t.bc \ +// RUN: 2>&1 | FileCheck %s --check-prefix=NEWDRIVER + +// NEWDRIVER: clang-linker-wrapper{{.*}}"--compress" "--verbose" "--compression-level=9" diff --git a/clang/test/Frontend/dump-minimization-hints.cpp b/clang/test/Frontend/dump-minimization-hints.cpp index 273fd7f..4c5dfbc 100644 --- a/clang/test/Frontend/dump-minimization-hints.cpp +++ b/clang/test/Frontend/dump-minimization-hints.cpp @@ -59,6 +59,36 @@ // RANGE-NEXT: "line": 23, // RANGE-NEXT: "column": 2 // RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 31, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 31, +// RANGE-NEXT: "column": 27 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 32, +// RANGE-NEXT: "column": 3 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 32, +// RANGE-NEXT: "column": 12 +// RANGE-NEXT: } +// RANGE-NEXT: }, +// RANGE-NEXT: { +// RANGE-NEXT: "from": { +// RANGE-NEXT: "line": 34, +// RANGE-NEXT: "column": 1 +// RANGE-NEXT: }, +// RANGE-NEXT: "to": { +// RANGE-NEXT: "line": 34, +// RANGE-NEXT: "column": 2 +// RANGE-NEXT: } // RANGE-NEXT: } // RANGE-NEXT: ] // RANGE-NEXT: } @@ -88,7 +118,7 @@ int multiply(int a, int b) { return a * b; } -inline int unused_by_foo() {} // line 17 +inline void unused_by_foo() {} // line 17 inline void recursively_used_by_foo() {} // line 19 inline int used_by_foo() { // line 20 @@ -98,6 +128,20 @@ inline int used_by_foo() { // line 20 struct UnusedByFoo {}; +namespace ns_unused_by_foo { + void x(); +} + +namespace ns_used_by_foo { // line 31 + void x(); // line 32 + void unused_y(); +} // line 34 + +// Does not have any declarations that are used, so +// will not be marked as used. +namespace ns_used_by_foo { + void unused_z(); +} //--- foo.cpp #include "foo.h" int global_value = 5; @@ -107,5 +151,6 @@ int main() { int doubled_value = multiply(current_value, 2); int final_result = doubled_value + global_value; - return used_by_foo(); + used_by_foo(); + ns_used_by_foo::x(); } diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp index 1f3c79e..a68e362 100644 --- a/compiler-rt/lib/asan/asan_mac.cpp +++ b/compiler-rt/lib/asan/asan_mac.cpp @@ -130,6 +130,7 @@ typedef void* dispatch_queue_t; typedef void* dispatch_source_t; typedef u64 dispatch_time_t; typedef void (*dispatch_function_t)(void *block); +typedef void (*dispatch_apply_function_t)(void *, size_t); typedef void* (*worker_t)(void *block); typedef unsigned long dispatch_mach_reason; typedef void *dispatch_mach_msg_t; @@ -149,7 +150,11 @@ typedef void (^dispatch_mach_handler_t)(dispatch_mach_reason reason, // A wrapper for the ObjC blocks used to support libdispatch. typedef struct { void *block; - dispatch_function_t func; + union { + dispatch_function_t dispatch_func; + dispatch_apply_function_t dispatch_apply_func; + static_assert(sizeof(dispatch_func) == sizeof(dispatch_apply_func)); + }; u32 parent_tid; } asan_block_context_t; @@ -177,7 +182,7 @@ void asan_dispatch_call_block_and_release(void *block) { block, (void*)pthread_self()); asan_register_worker_thread(context->parent_tid, &stack); // Call the original dispatcher for the block. - context->func(context->block); + context->dispatch_func(context->block); asan_free(context, &stack); } @@ -193,7 +198,7 @@ asan_block_context_t *alloc_asan_context(void *ctxt, dispatch_function_t func, asan_block_context_t *asan_ctxt = (asan_block_context_t*) asan_malloc(sizeof(asan_block_context_t), stack); asan_ctxt->block = ctxt; - asan_ctxt->func = func; + asan_ctxt->dispatch_func = func; asan_ctxt->parent_tid = GetCurrentTidOrInvalid(); return asan_ctxt; } @@ -249,14 +254,17 @@ extern "C" void asan_dispatch_apply_f_work(void *context, size_t iteration) { GET_STACK_TRACE_THREAD; asan_block_context_t *asan_ctxt = (asan_block_context_t *)context; asan_register_worker_thread(asan_ctxt->parent_tid, &stack); - ((void (*)(void *, size_t))asan_ctxt->func)(asan_ctxt->block, iteration); + asan_ctxt->dispatch_apply_func(asan_ctxt->block, iteration); } INTERCEPTOR(void, dispatch_apply_f, size_t iterations, dispatch_queue_t queue, - void *ctxt, void (*work)(void *, size_t)) { + void *ctxt, dispatch_apply_function_t work) { GET_STACK_TRACE_THREAD; asan_block_context_t *asan_ctxt = - alloc_asan_context(ctxt, (dispatch_function_t)work, &stack); + (asan_block_context_t *)asan_malloc(sizeof(asan_block_context_t), &stack); + asan_ctxt->block = ctxt; + asan_ctxt->dispatch_apply_func = work; + asan_ctxt->parent_tid = GetCurrentTidOrInvalid(); REAL(dispatch_apply_f)(iterations, queue, (void *)asan_ctxt, asan_dispatch_apply_f_work); } diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp index 487fa49..77cba5f 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_access.cpp @@ -684,7 +684,7 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { DCHECK(IsAppMem(addr + size - 1)); } if (!IsShadowMem(shadow_mem)) { - Printf("Bad shadow start addr: %p (%p)\n", shadow_mem, (void*)addr); + Printf("Bad shadow start addr: %p (%p)\n", (void*)shadow_mem, (void*)addr); DCHECK(IsShadowMem(shadow_mem)); } @@ -693,12 +693,12 @@ void MemoryAccessRangeT(ThreadState* thr, uptr pc, uptr addr, uptr size) { RawShadow* shadow_mem_end = shadow_mem + rounded_size / kShadowCell * kShadowCnt; if (!IsShadowMem(shadow_mem_end - 1)) { - Printf("Bad shadow end addr: %p (%p)\n", shadow_mem_end - 1, + Printf("Bad shadow end addr: %p (%p)\n", (void*)(shadow_mem_end - 1), (void*)(addr + size - 1)); Printf( "Shadow start addr (ok): %p (%p); size: 0x%zx; rounded_size: 0x%zx; " "kShadowMultiplier: %zx\n", - shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier); + (void*)shadow_mem, (void*)addr, size, rounded_size, kShadowMultiplier); DCHECK(IsShadowMem(shadow_mem_end - 1)); } #endif diff --git a/cross-project-tests/CMakeLists.txt b/cross-project-tests/CMakeLists.txt index b4b1f47..192db87 100644 --- a/cross-project-tests/CMakeLists.txt +++ b/cross-project-tests/CMakeLists.txt @@ -19,11 +19,12 @@ set(CROSS_PROJECT_TEST_DEPS FileCheck check-gdb-llvm-support count - llvm-dwarfdump + llvm-ar llvm-config + llvm-dwarfdump llvm-objdump - split-file not + split-file ) if ("clang" IN_LIST LLVM_ENABLE_PROJECTS) diff --git a/cross-project-tests/dtlto/ld-archive-thin.test b/cross-project-tests/dtlto/ld-archive-thin.test new file mode 100644 index 0000000..979da54 --- /dev/null +++ b/cross-project-tests/dtlto/ld-archive-thin.test @@ -0,0 +1,97 @@ +REQUIRES: ld.lld,llvm-ar + +## Test that a DTLTO link succeeds and outputs the expected set of files +## correctly when thin archives are present. + +RUN: rm -rf %t && split-file %s %t && cd %t + +## Compile bitcode. -O2 is required for cross-module importing. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c \ +RUN: foo.c bar.c dog.c cat.c start.c + +## Generate thin archives. +RUN: llvm-ar rcs foo.a foo.o --thin +## Create this bitcode thin archive in a subdirectory to test the expansion of +## the path to a bitcode file that is referenced using "..", e.g., in this case +## "../bar.o". +RUN: mkdir lib +RUN: llvm-ar rcs lib/bar.a bar.o --thin +## Create this bitcode thin archive with an absolute path entry containing "..". +RUN: llvm-ar rcs dog.a %t/lib/../dog.o --thin +## The bitcode member of cat.a will not be used in the link. +RUN: llvm-ar rcs cat.a cat.o --thin +RUN: llvm-ar rcs start.a start.o --thin + +## Link from a different directory to ensure that thin archive member paths are +## resolved correctly relative to the archive locations. +RUN: mkdir %t/out && cd %t/out + +RUN: %clang --target=x86_64-linux-gnu -flto=thin -fuse-ld=lld %t/foo.a %t/lib/bar.a ../start.a %t/cat.a \ +RUN: -Wl,--whole-archive ../dog.a \ +RUN: -fthinlto-distributor=%python \ +RUN: -Xthinlto-distributor=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--save-temps -nostdlib -Werror + +## Check that the required output files have been created. +RUN: ls | sort | FileCheck %s + +## No files are expected before. +CHECK-NOT: {{.}} + +## JSON jobs description. +CHECK: {{^}}a.[[PID:[a-zA-Z0-9_]+]].dist-file.json{{$}} + +## Native output object files and individual summary index files. +CHECK: {{^}}bar.3.[[PID]].native.o{{$}} +CHECK: {{^}}bar.3.[[PID]].native.o.thinlto.bc{{$}} +CHECK: {{^}}dog.1.[[PID]].native.o{{$}} +CHECK: {{^}}dog.1.[[PID]].native.o.thinlto.bc{{$}} +CHECK: {{^}}foo.2.[[PID]].native.o{{$}} +CHECK: {{^}}foo.2.[[PID]].native.o.thinlto.bc{{$}} +CHECK: {{^}}start.4.[[PID]].native.o{{$}} +CHECK: {{^}}start.4.[[PID]].native.o.thinlto.bc{{$}} + +## No files are expected after. +CHECK-NOT: {{.}} + + +## It is important that cross-module inlining occurs for this test to show that Clang can +## successfully load the bitcode file dependencies recorded in the summary indices. +## Explicitly check that the expected importing has occurred. + +RUN: llvm-dis start.4.*.native.o.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,BAR,START + +RUN: llvm-dis dog.1.*.native.o.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,BAR,DOG,START + +RUN: llvm-dis foo.2.*.native.o.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,BAR,START + +RUN: llvm-dis bar.3.*.native.o.thinlto.bc -o - | \ +RUN: FileCheck %s --check-prefixes=FOO,BAR,START + +FOO-DAG: foo.o +BAR-DAG: bar.o +DOG-DAG: dog.o +START-DAG: start.o + + +#--- foo.c +extern int bar(int), _start(int); +__attribute__((retain)) int foo(int x) { return x + bar(x) + _start(x); } + +#--- bar.c +extern int foo(int), _start(int); +__attribute__((retain)) int bar(int x) { return x + foo(x) + _start(x); } + +#--- dog.c +extern int foo(int), bar(int), _start(int); +__attribute__((retain)) int dog(int x) { return x + foo(x) + bar(x) + _start(x); } + +#--- cat.c +__attribute__((retain)) void cat(int x) {} + +#--- start.c +extern int foo(int), bar(int); +__attribute__((retain)) int _start(int x) { return x + foo(x) + bar(x); } diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py index 6a902bc..a3af729 100644 --- a/cross-project-tests/lit.cfg.py +++ b/cross-project-tests/lit.cfg.py @@ -107,6 +107,8 @@ lldb_path = llvm_config.use_llvm_tool("lldb", search_env="LLDB") if lldb_path is not None: config.available_features.add("lldb") +if llvm_config.use_llvm_tool("llvm-ar"): + config.available_features.add("llvm-ar") def configure_dexter_substitutions(): """Configure substitutions for host platform and return list of dependencies""" diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp index 64b57b6..0b8066e 100644 --- a/flang/examples/FeatureList/FeatureList.cpp +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -445,10 +445,9 @@ public: READ_FEATURE(ObjectDecl) READ_FEATURE(OldParameterStmt) READ_FEATURE(OmpAlignedClause) - READ_FEATURE(OmpBeginBlockDirective) + READ_FEATURE(OmpBeginDirective) READ_FEATURE(OmpBeginLoopDirective) READ_FEATURE(OmpBeginSectionsDirective) - READ_FEATURE(OmpBlockDirective) READ_FEATURE(OmpClause) READ_FEATURE(OmpClauseList) READ_FEATURE(OmpCriticalDirective) @@ -472,7 +471,7 @@ public: READ_FEATURE(OmpIteration) READ_FEATURE(OmpIterationOffset) READ_FEATURE(OmpIterationVector) - READ_FEATURE(OmpEndBlockDirective) + READ_FEATURE(OmpEndDirective) READ_FEATURE(OmpEndCriticalDirective) READ_FEATURE(OmpEndLoopDirective) READ_FEATURE(OmpEndSectionsDirective) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index cef57f1..2123561 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1402,10 +1402,8 @@ using OperatorSet = common::EnumSet<Operator, 32>; std::string ToString(Operator op); -template <typename... Ts, int Kind> -Operator OperationCode( - const evaluate::Operation<evaluate::LogicalOperation<Kind>, Ts...> &op) { - switch (op.derived().logicalOperator) { +template <int Kind> Operator OperationCode(const LogicalOperation<Kind> &op) { + switch (op.logicalOperator) { case common::LogicalOperator::And: return Operator::And; case common::LogicalOperator::Or: @@ -1420,10 +1418,10 @@ Operator OperationCode( return Operator::Unknown; } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Relational<T>, Ts...> &op) { - switch (op.derived().opr) { +Operator OperationCode(const Relational<SomeType> &op); + +template <typename T> Operator OperationCode(const Relational<T> &op) { + switch (op.opr) { case common::RelationalOperator::LT: return Operator::Lt; case common::RelationalOperator::LE: @@ -1440,44 +1438,32 @@ Operator OperationCode( return Operator::Unknown; } -template <typename T, typename... Ts> -Operator OperationCode(const evaluate::Operation<evaluate::Add<T>, Ts...> &op) { +template <typename T> Operator OperationCode(const Add<T> &op) { return Operator::Add; } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Subtract<T>, Ts...> &op) { +template <typename T> Operator OperationCode(const Subtract<T> &op) { return Operator::Sub; } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Multiply<T>, Ts...> &op) { +template <typename T> Operator OperationCode(const Multiply<T> &op) { return Operator::Mul; } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Divide<T>, Ts...> &op) { +template <typename T> Operator OperationCode(const Divide<T> &op) { return Operator::Div; } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Power<T>, Ts...> &op) { +template <typename T> Operator OperationCode(const Power<T> &op) { return Operator::Pow; } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::RealToIntPower<T>, Ts...> &op) { +template <typename T> Operator OperationCode(const RealToIntPower<T> &op) { return Operator::Pow; } -template <typename T, common::TypeCategory C, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Convert<T, C>, Ts...> &op) { +template <typename T, common::TypeCategory C> +Operator OperationCode(const Convert<T, C> &op) { if constexpr (C == T::category) { return Operator::Resize; } else { @@ -1485,25 +1471,27 @@ Operator OperationCode( } } -template <typename T, typename... Ts> -Operator OperationCode( - const evaluate::Operation<evaluate::Extremum<T>, Ts...> &op) { - if (op.derived().ordering == evaluate::Ordering::Greater) { +template <typename T> Operator OperationCode(const Extremum<T> &op) { + if (op.ordering == Ordering::Greater) { return Operator::Max; } else { return Operator::Min; } } -template <typename T> Operator OperationCode(const evaluate::Constant<T> &x) { +template <typename T> Operator OperationCode(const Constant<T> &x) { return Operator::Constant; } +template <typename T> Operator OperationCode(const Designator<T> &x) { + return Operator::Identity; +} + template <typename T> Operator OperationCode(const T &) { return Operator::Unknown; } -Operator OperationCode(const evaluate::ProcedureDesignator &proc); +Operator OperationCode(const ProcedureDesignator &proc); } // namespace operation diff --git a/flang/include/flang/Lower/Bridge.h b/flang/include/flang/Lower/Bridge.h index a8c2bcf..3710e74 100644 --- a/flang/include/flang/Lower/Bridge.h +++ b/flang/include/flang/Lower/Bridge.h @@ -76,6 +76,7 @@ public: loweringOptions, envDefaults, languageFeatures, targetMachine, targetOptions, codeGenOptions); } + ~LoweringBridge(); //===--------------------------------------------------------------------===// // Getters @@ -174,6 +175,7 @@ private: const std::vector<Fortran::lower::EnvironmentDefault> &envDefaults; const Fortran::common::LanguageFeatureControl &languageFeatures; std::set<std::string> tempNames; + std::optional<mlir::DiagnosticEngine::HandlerID> diagHandlerID; }; } // namespace lower diff --git a/flang/include/flang/Lower/LoweringOptions.def b/flang/include/flang/Lower/LoweringOptions.def index 8135704..39f197d 100644 --- a/flang/include/flang/Lower/LoweringOptions.def +++ b/flang/include/flang/Lower/LoweringOptions.def @@ -74,5 +74,10 @@ ENUM_LOWERINGOPT(SkipExternalRttiDefinition, unsigned, 1, 0) /// If false, lower to the complex dialect of MLIR. /// On by default. ENUM_LOWERINGOPT(ComplexDivisionToRuntime, unsigned, 1, 1) + +/// When true, it registers MLIRDiagnosticsHandler for the duration +/// of the lowering pipeline. +ENUM_LOWERINGOPT(RegisterMLIRDiagnosticsHandler, unsigned, 1, 1) + #undef LOWERINGOPT #undef ENUM_LOWERINGOPT diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 9141443..68e402c 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -534,10 +534,8 @@ public: NODE(parser, OmpAtClause) NODE_ENUM(OmpAtClause, ActionTime) NODE_ENUM(OmpSeverityClause, Severity) - NODE(parser, OmpBeginBlockDirective) NODE(parser, OmpBeginLoopDirective) NODE(parser, OmpBeginSectionsDirective) - NODE(parser, OmpBlockDirective) static std::string GetNodeName(const llvm::omp::Directive &x) { return llvm::Twine("llvm::omp::Directive = ", llvm::omp::getOpenMPDirectiveName(x, llvm::omp::FallbackVersion)) @@ -586,7 +584,6 @@ public: NODE(parser, OmpDetachClause) NODE(parser, OmpDoacrossClause) NODE(parser, OmpDestroyClause) - NODE(parser, OmpEndBlockDirective) NODE(parser, OmpEndCriticalDirective) NODE(parser, OmpEndLoopDirective) NODE(parser, OmpEndSectionsDirective) @@ -708,6 +705,8 @@ public: NODE(parser, OpenMPDeclarativeAssumes) NODE(parser, OmpAssumeDirective) NODE(parser, OmpEndAssumeDirective) + NODE(parser, OmpBeginDirective) + NODE(parser, OmpEndDirective) NODE(parser, OpenMPAtomicConstruct) NODE(parser, OpenMPBlockConstruct) NODE(parser, OpenMPCancelConstruct) diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index 41c0442..fa0f765 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -68,11 +68,6 @@ struct DirectiveNameScope { return MakeName(x.source, llvm::omp::Directive::OMPD_nothing); } - static OmpDirectiveName GetOmpDirectiveName(const OmpBeginBlockDirective &x) { - auto &dir{std::get<OmpBlockDirective>(x.t)}; - return MakeName(dir.source, dir.v); - } - static OmpDirectiveName GetOmpDirectiveName(const OmpBeginLoopDirective &x) { auto &dir{std::get<OmpLoopDirective>(x.t)}; return MakeName(dir.source, dir.v); @@ -106,10 +101,8 @@ struct DirectiveNameScope { return GetOmpDirectiveName(x.v); } } else if constexpr (TupleTrait<T>) { - if constexpr (std::is_same_v<T, OpenMPAllocatorsConstruct> || - std::is_same_v<T, OpenMPAtomicConstruct> || - std::is_same_v<T, OpenMPDispatchConstruct>) { - return std::get<OmpDirectiveSpecification>(x.t).DirName(); + if constexpr (std::is_base_of_v<OmpBlockConstruct, T>) { + return std::get<OmpBeginDirective>(x.t).DirName(); } else if constexpr (std::is_same_v<T, OmpAssumeDirective> || std::is_same_v<T, OmpCriticalDirective> || std::is_same_v<T, OmpDeclareVariantDirective> || diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 137552f..7807db4 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3469,6 +3469,12 @@ WRAPPER_CLASS(PauseStmt, std::optional<StopCode>); // --- Common definitions +#define INHERITED_TUPLE_CLASS_BOILERPLATE(classname, basename) \ + using basename::basename; \ + classname(basename &&b) : basename(std::move(b)) {} \ + using TupleTrait = std::true_type; \ + BOILERPLATE(classname) + #define INHERITED_WRAPPER_CLASS_BOILERPLATE(classname, basename) \ BOILERPLATE(classname); \ using basename::basename; \ @@ -4750,6 +4756,33 @@ struct OmpDirectiveSpecification { t; }; +// OmpBeginDirective and OmpEndDirective are needed for semantic analysis, +// where some checks are done specifically for either the begin or the end +// directive. The structure of both is identical, but the diffent types +// allow to distinguish them in the type-based parse-tree visitor. +struct OmpBeginDirective : public OmpDirectiveSpecification { + INHERITED_TUPLE_CLASS_BOILERPLATE( + OmpBeginDirective, OmpDirectiveSpecification); +}; + +struct OmpEndDirective : public OmpDirectiveSpecification { + INHERITED_TUPLE_CLASS_BOILERPLATE(OmpEndDirective, OmpDirectiveSpecification); +}; + +// Common base class for block-associated constructs. +struct OmpBlockConstruct { + TUPLE_CLASS_BOILERPLATE(OmpBlockConstruct); + const OmpBeginDirective &BeginDir() const { + return std::get<OmpBeginDirective>(t); + } + const std::optional<OmpEndDirective> &EndDir() const { + return std::get<std::optional<OmpEndDirective>>(t); + } + + CharBlock source; + std::tuple<OmpBeginDirective, Block, std::optional<OmpEndDirective>> t; +}; + struct OmpMetadirectiveDirective { TUPLE_CLASS_BOILERPLATE(OmpMetadirectiveDirective); std::tuple<Verbatim, OmpClauseList> t; @@ -4854,12 +4887,6 @@ struct OpenMPSectionsConstruct { t; }; -// OpenMP directive beginning or ending a block -struct OmpBlockDirective { - WRAPPER_CLASS_BOILERPLATE(OmpBlockDirective, llvm::omp::Directive); - CharBlock source; -}; - struct OmpDeclareVariantDirective { TUPLE_CLASS_BOILERPLATE(OmpDeclareVariantDirective); CharBlock source; @@ -4984,12 +5011,9 @@ struct OpenMPExecutableAllocate { // ALLOCATORS [allocate-clause...] // block // [END ALLOCATORS] -struct OpenMPAllocatorsConstruct { - TUPLE_CLASS_BOILERPLATE(OpenMPAllocatorsConstruct); - CharBlock source; - std::tuple<OmpDirectiveSpecification, Block, - std::optional<OmpDirectiveSpecification>> - t; +struct OpenMPAllocatorsConstruct : public OmpBlockConstruct { + INHERITED_TUPLE_CLASS_BOILERPLATE( + OpenMPAllocatorsConstruct, OmpBlockConstruct); }; // 2.17.7 Atomic construct/2.17.8 Flush construct [OpenMP 5.0] @@ -5003,15 +5027,11 @@ struct OmpMemoryOrderClause { CharBlock source; }; -struct OpenMPAtomicConstruct { +struct OpenMPAtomicConstruct : public OmpBlockConstruct { llvm::omp::Clause GetKind() const; bool IsCapture() const; bool IsCompare() const; - TUPLE_CLASS_BOILERPLATE(OpenMPAtomicConstruct); - CharBlock source; - std::tuple<OmpDirectiveSpecification, Block, - std::optional<OmpDirectiveSpecification>> - t; + INHERITED_TUPLE_CLASS_BOILERPLATE(OpenMPAtomicConstruct, OmpBlockConstruct); // Information filled out during semantic checks to avoid duplication // of analyses. @@ -5075,12 +5095,8 @@ struct OpenMPDepobjConstruct { // nocontext-clause | // novariants-clause | // nowait-clause -struct OpenMPDispatchConstruct { - TUPLE_CLASS_BOILERPLATE(OpenMPDispatchConstruct); - CharBlock source; - std::tuple<OmpDirectiveSpecification, Block, - std::optional<OmpDirectiveSpecification>> - t; +struct OpenMPDispatchConstruct : public OmpBlockConstruct { + INHERITED_TUPLE_CLASS_BOILERPLATE(OpenMPDispatchConstruct, OmpBlockConstruct); }; // [4.5:162-165], [5.0:242-246], [5.1:275-279], [5.2:315-316], [6.0:498-500] @@ -5135,22 +5151,8 @@ struct OmpEndLoopDirective { CharBlock source; }; -struct OmpBeginBlockDirective { - TUPLE_CLASS_BOILERPLATE(OmpBeginBlockDirective); - std::tuple<OmpBlockDirective, OmpClauseList> t; - CharBlock source; -}; - -struct OmpEndBlockDirective { - TUPLE_CLASS_BOILERPLATE(OmpEndBlockDirective); - std::tuple<OmpBlockDirective, OmpClauseList> t; - CharBlock source; -}; - -struct OpenMPBlockConstruct { - TUPLE_CLASS_BOILERPLATE(OpenMPBlockConstruct); - std::tuple<OmpBeginBlockDirective, Block, std::optional<OmpEndBlockDirective>> - t; +struct OpenMPBlockConstruct : public OmpBlockConstruct { + INHERITED_TUPLE_CLASS_BOILERPLATE(OpenMPBlockConstruct, OmpBlockConstruct); }; // OpenMP directives enclosing do loop diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 171dd91..9c059b0 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1693,17 +1693,17 @@ struct ArgumentExtractor // to int(kind=4) for example. return (*this)(x.template operand<0>()); } else { - return std::make_pair(operation::OperationCode(x), + return std::make_pair(operation::OperationCode(x.derived()), OperationArgs(x, std::index_sequence_for<Os...>{})); } } template <typename T> Result operator()(const Designator<T> &x) const { - return {operation::Operator::Identity, {AsSomeExpr(x)}}; + return {operation::OperationCode(x), {AsSomeExpr(x)}}; } template <typename T> Result operator()(const Constant<T> &x) const { - return {operation::Operator::Identity, {AsSomeExpr(x)}}; + return {operation::OperationCode(x), {AsSomeExpr(x)}}; } template <typename... Rs> @@ -1793,6 +1793,10 @@ std::string operation::ToString(operation::Operator op) { llvm_unreachable("Unhandler operator"); } +operation::Operator operation::OperationCode(const Relational<SomeType> &op) { + return common::visit([](auto &&s) { return OperationCode(s); }, op.u); +} + operation::Operator operation::OperationCode(const ProcedureDesignator &proc) { Operator code{llvm::StringSwitch<Operator>(proc.GetName()) .Case("associated", Operator::Associated) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 1adfb96..75048c1 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -6707,27 +6707,30 @@ Fortran::lower::LoweringBridge::LoweringBridge( loweringOptions{loweringOptions}, envDefaults{envDefaults}, languageFeatures{languageFeatures} { // Register the diagnostic handler. - context.getDiagEngine().registerHandler([](mlir::Diagnostic &diag) { - llvm::raw_ostream &os = llvm::errs(); - switch (diag.getSeverity()) { - case mlir::DiagnosticSeverity::Error: - os << "error: "; - break; - case mlir::DiagnosticSeverity::Remark: - os << "info: "; - break; - case mlir::DiagnosticSeverity::Warning: - os << "warning: "; - break; - default: - break; - } - if (!mlir::isa<mlir::UnknownLoc>(diag.getLocation())) - os << diag.getLocation() << ": "; - os << diag << '\n'; - os.flush(); - return mlir::success(); - }); + if (loweringOptions.getRegisterMLIRDiagnosticsHandler()) { + diagHandlerID = + context.getDiagEngine().registerHandler([](mlir::Diagnostic &diag) { + llvm::raw_ostream &os = llvm::errs(); + switch (diag.getSeverity()) { + case mlir::DiagnosticSeverity::Error: + os << "error: "; + break; + case mlir::DiagnosticSeverity::Remark: + os << "info: "; + break; + case mlir::DiagnosticSeverity::Warning: + os << "warning: "; + break; + default: + break; + } + if (!mlir::isa<mlir::UnknownLoc>(diag.getLocation())) + os << diag.getLocation() << ": "; + os << diag << '\n'; + os.flush(); + return mlir::success(); + }); + } auto getPathLocation = [&semanticsContext, &context]() -> mlir::Location { std::optional<std::string> path; @@ -6769,6 +6772,11 @@ Fortran::lower::LoweringBridge::LoweringBridge( fir::setCommandline(*module, *cgOpts.RecordCommandLine); } +Fortran::lower::LoweringBridge::~LoweringBridge() { + if (diagHandlerID) + context.getDiagEngine().eraseHandler(*diagHandlerID); +} + void Fortran::lower::genCleanUpInRegionIfAny( mlir::Location loc, fir::FirOpBuilder &builder, mlir::Region ®ion, Fortran::lower::StatementContext &context) { diff --git a/flang/lib/Lower/Mangler.cpp b/flang/lib/Lower/Mangler.cpp index 1333e3fe..e1ae86a 100644 --- a/flang/lib/Lower/Mangler.cpp +++ b/flang/lib/Lower/Mangler.cpp @@ -224,8 +224,18 @@ std::string Fortran::lower::mangle::mangleName( assert(paramExpr && "derived type kind param not explicit"); std::optional<int64_t> init = Fortran::evaluate::ToInt64(paramValue->GetExplicit()); - assert(init && "derived type kind param is not constant"); - kinds.emplace_back(*init); + // TODO: put the assertion check back when parametrized derived types + // are supported: + // assert(init && "derived type kind param is not constant"); + // + // The init parameter above will require a FoldingContext for proper + // expression evaluation to an integer constant, otherwise the + // compiler may crash here (see example in issue #127424). + if (!init) { + TODO_NOLOC("parameterized derived types"); + } else { + kinds.emplace_back(*init); + } } } return fir::NameUniquer::doType(modules, procs, blockId, symbolName, kinds); diff --git a/flang/lib/Lower/OpenMP/Atomic.cpp b/flang/lib/Lower/OpenMP/Atomic.cpp index c9a6dba..ed0bff0 100644 --- a/flang/lib/Lower/OpenMP/Atomic.cpp +++ b/flang/lib/Lower/OpenMP/Atomic.cpp @@ -707,7 +707,7 @@ void Fortran::lower::omp::lowerAtomic( }; fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - auto &dirSpec = std::get<parser::OmpDirectiveSpecification>(construct.t); + const parser::OmpDirectiveSpecification &dirSpec = construct.BeginDir(); omp::List<omp::Clause> clauses = makeClauses(dirSpec.Clauses(), semaCtx); lower::StatementContext stmtCtx; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 575658f..d1efd8e 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -407,16 +407,9 @@ static void processHostEvalClauses(lower::AbstractConverter &converter, common::visit( common::visitors{ [&](const parser::OpenMPBlockConstruct &ompConstruct) { - const auto &beginDirective = - std::get<parser::OmpBeginBlockDirective>(ompConstruct.t); - beginClauseList = - &std::get<parser::OmpClauseList>(beginDirective.t); - if (auto &endDirective = - std::get<std::optional<parser::OmpEndBlockDirective>>( - ompConstruct.t)) { - endClauseList = - &std::get<parser::OmpClauseList>(endDirective->t); - } + beginClauseList = &ompConstruct.BeginDir().Clauses(); + if (auto &endSpec = ompConstruct.EndDir()) + endClauseList = &endSpec->Clauses(); }, [&](const parser::OpenMPLoopConstruct &ompConstruct) { const auto &beginDirective = @@ -3148,11 +3141,16 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, simdReductionSyms); - // TODO: Support delayed privatization. - DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, symTable); - dsp.processStep1(); + DataSharingProcessor distributeItemDSP( + converter, semaCtx, distributeItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/false, + /*useDelayedPrivatization=*/true, symTable); + distributeItemDSP.processStep1(&distributeClauseOps); + + DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, symTable); + simdItemDSP.processStep1(&simdClauseOps); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. @@ -3163,13 +3161,15 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( // Operation creation. EntryBlockArgs distributeArgs; - // TODO: Add private syms and vars. + distributeArgs.priv.syms = distributeItemDSP.getDelayedPrivSymbols(); + distributeArgs.priv.vars = distributeClauseOps.privateVars; auto distributeOp = genWrapperOp<mlir::omp::DistributeOp>( converter, loc, distributeClauseOps, distributeArgs); distributeOp.setComposite(/*val=*/true); EntryBlockArgs simdArgs; - // TODO: Add private syms and vars. + simdArgs.priv.syms = simdItemDSP.getDelayedPrivSymbols(); + simdArgs.priv.vars = simdClauseOps.privateVars; simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = @@ -3179,7 +3179,7 @@ static mlir::omp::DistributeOp genCompositeDistributeSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{distributeOp, distributeArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_distribute_simd, dsp); + llvm::omp::Directive::OMPD_distribute_simd, simdItemDSP); return distributeOp; } @@ -3203,11 +3203,16 @@ static mlir::omp::WsloopOp genCompositeDoSimd( genSimdClauses(converter, semaCtx, simdItem->clauses, loc, simdClauseOps, simdReductionSyms); - // TODO: Support delayed privatization. - DataSharingProcessor dsp(converter, semaCtx, simdItem->clauses, eval, - /*shouldCollectPreDeterminedSymbols=*/true, - /*useDelayedPrivatization=*/false, symTable); - dsp.processStep1(); + DataSharingProcessor wsloopItemDSP( + converter, semaCtx, doItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/false, + /*useDelayedPrivatization=*/true, symTable); + wsloopItemDSP.processStep1(&wsloopClauseOps); + + DataSharingProcessor simdItemDSP(converter, semaCtx, simdItem->clauses, eval, + /*shouldCollectPreDeterminedSymbols=*/true, + /*useDelayedPrivatization=*/true, symTable); + simdItemDSP.processStep1(&simdClauseOps); // Pass the innermost leaf construct's clauses because that's where COLLAPSE // is placed by construct decomposition. @@ -3218,7 +3223,8 @@ static mlir::omp::WsloopOp genCompositeDoSimd( // Operation creation. EntryBlockArgs wsloopArgs; - // TODO: Add private syms and vars. + wsloopArgs.priv.syms = wsloopItemDSP.getDelayedPrivSymbols(); + wsloopArgs.priv.vars = wsloopClauseOps.privateVars; wsloopArgs.reduction.syms = wsloopReductionSyms; wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; auto wsloopOp = genWrapperOp<mlir::omp::WsloopOp>( @@ -3226,7 +3232,8 @@ static mlir::omp::WsloopOp genCompositeDoSimd( wsloopOp.setComposite(/*val=*/true); EntryBlockArgs simdArgs; - // TODO: Add private syms and vars. + simdArgs.priv.syms = simdItemDSP.getDelayedPrivSymbols(); + simdArgs.priv.vars = simdClauseOps.privateVars; simdArgs.reduction.syms = simdReductionSyms; simdArgs.reduction.vars = simdClauseOps.reductionVars; auto simdOp = @@ -3236,7 +3243,7 @@ static mlir::omp::WsloopOp genCompositeDoSimd( genLoopNestOp(converter, symTable, semaCtx, eval, loc, queue, simdItem, loopNestClauseOps, iv, {{wsloopOp, wsloopArgs}, {simdOp, simdArgs}}, - llvm::omp::Directive::OMPD_do_simd, dsp); + llvm::omp::Directive::OMPD_do_simd, simdItemDSP); return wsloopOp; } @@ -3719,25 +3726,16 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, semantics::SemanticsContext &semaCtx, lower::pft::Evaluation &eval, const parser::OpenMPBlockConstruct &blockConstruct) { - const auto &beginBlockDirective = - std::get<parser::OmpBeginBlockDirective>(blockConstruct.t); - mlir::Location currentLocation = - converter.genLocation(beginBlockDirective.source); - const auto origDirective = - std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v; - List<Clause> clauses = makeClauses( - std::get<parser::OmpClauseList>(beginBlockDirective.t), semaCtx); - - if (const auto &endBlockDirective = - std::get<std::optional<parser::OmpEndBlockDirective>>( - blockConstruct.t)) { - clauses.append(makeClauses( - std::get<parser::OmpClauseList>(endBlockDirective->t), semaCtx)); - } - - assert(llvm::omp::blockConstructSet.test(origDirective) && + const parser::OmpDirectiveSpecification &beginSpec = + blockConstruct.BeginDir(); + List<Clause> clauses = makeClauses(beginSpec.Clauses(), semaCtx); + if (auto &endSpec = blockConstruct.EndDir()) + clauses.append(makeClauses(endSpec->Clauses(), semaCtx)); + + llvm::omp::Directive directive = beginSpec.DirId(); + assert(llvm::omp::blockConstructSet.test(directive) && "Expected block construct"); - (void)origDirective; + mlir::Location currentLocation = converter.genLocation(beginSpec.source); for (const Clause &clause : clauses) { mlir::Location clauseLocation = converter.genLocation(clause.source); @@ -3780,13 +3778,9 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, } } - llvm::omp::Directive directive = - std::get<parser::OmpBlockDirective>(beginBlockDirective.t).v; - const parser::CharBlock &source = - std::get<parser::OmpBlockDirective>(beginBlockDirective.t).source; ConstructQueue queue{ buildConstructQueue(converter.getFirOpBuilder().getModule(), semaCtx, - eval, source, directive, clauses)}; + eval, beginSpec.source, directive, clauses)}; genOMPDispatch(converter, symTable, semaCtx, eval, currentLocation, queue, queue.begin()); } @@ -4074,8 +4068,7 @@ bool Fortran::lower::isOpenMPTargetConstruct( const parser::OpenMPConstruct &omp) { llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; if (const auto *block = std::get_if<parser::OpenMPBlockConstruct>(&omp.u)) { - const auto &begin = std::get<parser::OmpBeginBlockDirective>(block->t); - dir = std::get<parser::OmpBlockDirective>(begin.t).v; + dir = block->BeginDir().DirId(); } else if (const auto *loop = std::get_if<parser::OpenMPLoopConstruct>(&omp.u)) { const auto &begin = std::get<parser::OmpBeginLoopDirective>(loop->t); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 9b73dc8..7daba33 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1381,16 +1381,41 @@ TYPE_PARSER(sourced(construct<OmpLoopDirective>(first( TYPE_PARSER(sourced(construct<OmpBeginLoopDirective>( sourced(Parser<OmpLoopDirective>{}), Parser<OmpClauseList>{}))) +static inline constexpr auto IsDirective(llvm::omp::Directive dir) { + return [dir](const OmpDirectiveName &name) -> bool { return dir == name.v; }; +} + +struct OmpBeginDirectiveParser { + using resultType = OmpDirectiveSpecification; + + constexpr OmpBeginDirectiveParser(llvm::omp::Directive dir) : dir_(dir) {} + + std::optional<resultType> Parse(ParseState &state) const { + auto &&p{predicated(Parser<OmpDirectiveName>{}, IsDirective(dir_)) >= + Parser<OmpDirectiveSpecification>{}}; + return p.Parse(state); + } + +private: + llvm::omp::Directive dir_; +}; + struct OmpEndDirectiveParser { using resultType = OmpDirectiveSpecification; constexpr OmpEndDirectiveParser(llvm::omp::Directive dir) : dir_(dir) {} std::optional<resultType> Parse(ParseState &state) const { - if ((startOmpLine >> "END"_sptok).Parse(state)) { - auto &&dirSpec{Parser<OmpDirectiveSpecification>{}.Parse(state)}; - if (dirSpec && dirSpec->DirId() == dir_) { - return std::move(dirSpec); + if (startOmpLine.Parse(state)) { + if (auto endToken{verbatim("END"_sptok).Parse(state)}) { + if (auto &&dirSpec{OmpBeginDirectiveParser(dir_).Parse(state)}) { + // Extend the "source" on both the OmpDirectiveName and the + // OmpDirectiveNameSpecification. + CharBlock &nameSource{std::get<OmpDirectiveName>(dirSpec->t).source}; + nameSource.ExtendToCover(endToken->source); + dirSpec->source.ExtendToCover(endToken->source); + return std::move(*dirSpec); + } } } return std::nullopt; @@ -1400,57 +1425,67 @@ private: llvm::omp::Directive dir_; }; -struct OmpAllocatorsConstructParser { - using resultType = OpenMPAllocatorsConstruct; +struct OmpStatementConstructParser { + using resultType = OmpBlockConstruct; + + constexpr OmpStatementConstructParser(llvm::omp::Directive dir) : dir_(dir) {} std::optional<resultType> Parse(ParseState &state) const { - auto dirSpec{Parser<OmpDirectiveSpecification>{}.Parse(state)}; - if (!dirSpec || dirSpec->DirId() != llvm::omp::Directive::OMPD_allocators) { - return std::nullopt; - } + if (auto begin{OmpBeginDirectiveParser(dir_).Parse(state)}) { + Block body; + if (auto stmt{attempt(Parser<ExecutionPartConstruct>{}).Parse(state)}) { + body.emplace_back(std::move(*stmt)); + } + // Allow empty block. Check for this in semantics. - // This should be an allocate-stmt. That will be checked in semantics. - Block block; - if (auto stmt{attempt(Parser<ExecutionPartConstruct>{}).Parse(state)}) { - block.emplace_back(std::move(*stmt)); + auto end{maybe(OmpEndDirectiveParser{dir_}).Parse(state)}; + return OmpBlockConstruct{OmpBeginDirective(std::move(*begin)), + std::move(body), + llvm::transformOptional(std::move(*end), + [](auto &&s) { return OmpEndDirective(std::move(s)); })}; } - // Allow empty block. Check for this in semantics. - - auto end{OmpEndDirectiveParser{llvm::omp::Directive::OMPD_allocators}}; - return OpenMPAllocatorsConstruct{ - std::move(*dirSpec), std::move(block), *maybe(end).Parse(state)}; + return std::nullopt; } + +private: + llvm::omp::Directive dir_; }; -TYPE_PARSER(sourced( // - construct<OpenMPAllocatorsConstruct>( - "ALLOCATORS"_tok >= OmpAllocatorsConstructParser{}))) +struct OmpBlockConstructParser { + using resultType = OmpBlockConstruct; -struct OmpDispatchConstructParser { - using resultType = OpenMPDispatchConstruct; + constexpr OmpBlockConstructParser(llvm::omp::Directive dir) : dir_(dir) {} std::optional<resultType> Parse(ParseState &state) const { - auto dirSpec{Parser<OmpDirectiveSpecification>{}.Parse(state)}; - if (!dirSpec || dirSpec->DirId() != llvm::omp::Directive::OMPD_dispatch) { - return std::nullopt; - } - - // This should be a function call. That will be checked in semantics. - Block block; - if (auto stmt{attempt(Parser<ExecutionPartConstruct>{}).Parse(state)}) { - block.emplace_back(std::move(*stmt)); + if (auto &&begin{OmpBeginDirectiveParser(dir_).Parse(state)}) { + if (auto &&body{attempt(StrictlyStructuredBlockParser{}).Parse(state)}) { + // Try strictly-structured block with an optional end-directive + auto end{maybe(OmpEndDirectiveParser{dir_}).Parse(state)}; + return OmpBlockConstruct{OmpBeginDirective(std::move(*begin)), + std::move(*body), + llvm::transformOptional(std::move(*end), + [](auto &&s) { return OmpEndDirective(std::move(s)); })}; + } else if (auto &&body{ + attempt(LooselyStructuredBlockParser{}).Parse(state)}) { + // Try loosely-structured block with a mandatory end-directive + if (auto end{OmpEndDirectiveParser{dir_}.Parse(state)}) { + return OmpBlockConstruct{OmpBeginDirective(std::move(*begin)), + std::move(*body), OmpEndDirective{std::move(*end)}}; + } + } } - // Allow empty block. Check for this in semantics. - - auto end{OmpEndDirectiveParser{llvm::omp::Directive::OMPD_dispatch}}; - return OpenMPDispatchConstruct{ - std::move(*dirSpec), std::move(block), *maybe(end).Parse(state)}; + return std::nullopt; } + +private: + llvm::omp::Directive dir_; }; -TYPE_PARSER(sourced( // - construct<OpenMPDispatchConstruct>( - "DISPATCH"_tok >= OmpDispatchConstructParser{}))) +TYPE_PARSER(sourced(construct<OpenMPAllocatorsConstruct>( + OmpStatementConstructParser{llvm::omp::Directive::OMPD_allocators}))) + +TYPE_PARSER(sourced(construct<OpenMPDispatchConstruct>( + OmpStatementConstructParser{llvm::omp::Directive::OMPD_dispatch}))) // Parser for an arbitrary OpenMP ATOMIC construct. // @@ -1515,8 +1550,10 @@ struct OmpAtomicConstructParser { } } recursing_ = false; - return OpenMPAtomicConstruct{ - std::move(*dirSpec), std::move(tail.first), std::move(tail.second)}; + return OpenMPAtomicConstruct{OmpBeginDirective(std::move(*dirSpec)), + std::move(tail.first), + llvm::transformOptional(std::move(tail.second), + [](auto &&s) { return OmpEndDirective(std::move(s)); })}; } recursing_ = false; @@ -1617,10 +1654,6 @@ TYPE_PARSER(sourced( // predicated(OmpDirectiveNameParser{}, IsSimpleStandalone) >= Parser<OmpDirectiveSpecification>{}))) -static inline constexpr auto IsDirective(llvm::omp::Directive dir) { - return [dir](const OmpDirectiveName &name) -> bool { return dir == name.v; }; -} - TYPE_PARSER(sourced( // construct<OpenMPFlushConstruct>( predicated(OmpDirectiveNameParser{}, @@ -1671,40 +1704,6 @@ TYPE_PARSER( Parser<OpenMPInteropConstruct>{})) / endOfLine) -// Directive names (of non-block constructs) whose prefix is a name of -// a block-associated construct. We need to exclude them from the block -// directive parser below to avoid parsing parts of them. -static constexpr auto StandaloneDirectiveLookahead{// - "TARGET ENTER DATA"_sptok || "TARGET_ENTER_DATA"_sptok || // - "TARGET EXIT DATA"_sptok || "TARGET_EXIT"_sptok || // - "TARGET UPDATE"_sptok || "TARGET_UPDATE"_sptok}; - -// Directives enclosing structured-block -TYPE_PARSER((!StandaloneDirectiveLookahead) >= - construct<OmpBlockDirective>(first( - "MASKED" >> pure(llvm::omp::Directive::OMPD_masked), - "MASTER" >> pure(llvm::omp::Directive::OMPD_master), - "ORDERED" >> pure(llvm::omp::Directive::OMPD_ordered), - "PARALLEL MASKED" >> pure(llvm::omp::Directive::OMPD_parallel_masked), - "PARALLEL MASTER" >> pure(llvm::omp::Directive::OMPD_parallel_master), - "PARALLEL WORKSHARE" >> - pure(llvm::omp::Directive::OMPD_parallel_workshare), - "PARALLEL" >> pure(llvm::omp::Directive::OMPD_parallel), - "SCOPE" >> pure(llvm::omp::Directive::OMPD_scope), - "SINGLE" >> pure(llvm::omp::Directive::OMPD_single), - "TARGET DATA" >> pure(llvm::omp::Directive::OMPD_target_data), - "TARGET_DATA" >> pure(llvm::omp::Directive::OMPD_target_data), - "TARGET PARALLEL" >> pure(llvm::omp::Directive::OMPD_target_parallel), - "TARGET TEAMS" >> pure(llvm::omp::Directive::OMPD_target_teams), - "TARGET" >> pure(llvm::omp::Directive::OMPD_target), - "TASK"_id >> pure(llvm::omp::Directive::OMPD_task), - "TASKGROUP" >> pure(llvm::omp::Directive::OMPD_taskgroup), - "TEAMS" >> pure(llvm::omp::Directive::OMPD_teams), - "WORKSHARE" >> pure(llvm::omp::Directive::OMPD_workshare)))) - -TYPE_PARSER(sourced(construct<OmpBeginBlockDirective>( - sourced(Parser<OmpBlockDirective>{}), Parser<OmpClauseList>{}))) - TYPE_PARSER(construct<OmpInitializerProc>(Parser<ProcedureDesignator>{}, parenthesized(many(maybe(","_tok) >> Parser<ActualArgSpec>{})))) @@ -1854,12 +1853,27 @@ TYPE_PARSER(sourced( block, maybe(Parser<OmpEndAssumeDirective>{} / endOmpLine)))) // Block Construct +#define MakeBlockConstruct(dir) \ + construct<OpenMPBlockConstruct>(OmpBlockConstructParser{dir}) TYPE_PARSER( // - construct<OpenMPBlockConstruct>(Parser<OmpBeginBlockDirective>{}, - StrictlyStructuredBlockParser{}, - maybe(Parser<OmpEndBlockDirective>{})) || - construct<OpenMPBlockConstruct>(Parser<OmpBeginBlockDirective>{}, - LooselyStructuredBlockParser{}, Parser<OmpEndBlockDirective>{})) + MakeBlockConstruct(llvm::omp::Directive::OMPD_masked) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_master) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_ordered) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_parallel_masked) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_parallel_master) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_parallel_workshare) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_parallel) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_scope) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_single) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_target_data) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_target_parallel) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_target_teams) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_target) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_task) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_taskgroup) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_teams) || + MakeBlockConstruct(llvm::omp::Directive::OMPD_workshare)) +#undef MakeBlockConstruct // OMP SECTIONS Directive TYPE_PARSER(construct<OmpSectionsDirective>(first( @@ -1914,12 +1928,6 @@ TYPE_CONTEXT_PARSER("OpenMP construct"_en_US, construct<OpenMPConstruct>(Parser<OpenMPAssumeConstruct>{}), construct<OpenMPConstruct>(Parser<OpenMPCriticalConstruct>{})))) -// END OMP Block directives -TYPE_PARSER( - startOmpLine >> sourced(construct<OmpEndBlockDirective>( - sourced("END"_tok >> Parser<OmpBlockDirective>{}), - Parser<OmpClauseList>{}))) - // END OMP Loop directives TYPE_PARSER( startOmpLine >> sourced(construct<OmpEndLoopDirective>( diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp index 7b46c9f4..cb30939 100644 --- a/flang/lib/Parser/parse-tree.cpp +++ b/flang/lib/Parser/parse-tree.cpp @@ -322,7 +322,7 @@ std::string OmpTraitSetSelectorName::ToString() const { } llvm::omp::Clause OpenMPAtomicConstruct::GetKind() const { - auto &dirSpec{std::get<OmpDirectiveSpecification>(t)}; + const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)}; for (auto &clause : dirSpec.Clauses().v) { switch (clause.Id()) { case llvm::omp::Clause::OMPC_read: @@ -337,14 +337,14 @@ llvm::omp::Clause OpenMPAtomicConstruct::GetKind() const { } bool OpenMPAtomicConstruct::IsCapture() const { - auto &dirSpec{std::get<OmpDirectiveSpecification>(t)}; + const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)}; return llvm::any_of(dirSpec.Clauses().v, [](auto &clause) { return clause.Id() == llvm::omp::Clause::OMPC_capture; }); } bool OpenMPAtomicConstruct::IsCompare() const { - auto &dirSpec{std::get<OmpDirectiveSpecification>(t)}; + const OmpDirectiveSpecification &dirSpec{std::get<OmpBeginDirective>(t)}; return llvm::any_of(dirSpec.Clauses().v, [](auto &clause) { return clause.Id() == llvm::omp::Clause::OMPC_compare; }); diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index ef209ff..44f31ca 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2513,87 +2513,39 @@ public: } } void Unparse(const OmpObjectList &x) { Walk(x.v, ","); } - void Unparse(const OmpBlockDirective &x) { - switch (x.v) { - case llvm::omp::Directive::OMPD_masked: - Word("MASKED"); - break; - case llvm::omp::Directive::OMPD_master: - Word("MASTER"); - break; - case llvm::omp::Directive::OMPD_ordered: - Word("ORDERED "); - break; - case llvm::omp::Directive::OMPD_parallel_masked: - Word("PARALLEL MASKED"); - break; - case llvm::omp::Directive::OMPD_parallel_master: - Word("PARALLEL MASTER"); - break; - case llvm::omp::Directive::OMPD_parallel_workshare: - Word("PARALLEL WORKSHARE "); - break; - case llvm::omp::Directive::OMPD_parallel: - Word("PARALLEL "); - break; - case llvm::omp::Directive::OMPD_scope: - Word("SCOPE "); - break; - case llvm::omp::Directive::OMPD_single: - Word("SINGLE "); - break; - case llvm::omp::Directive::OMPD_target_data: - Word("TARGET DATA "); - break; - case llvm::omp::Directive::OMPD_target_parallel: - Word("TARGET PARALLEL "); - break; - case llvm::omp::Directive::OMPD_target_teams: - Word("TARGET TEAMS "); - break; - case llvm::omp::Directive::OMPD_target: - Word("TARGET "); - break; - case llvm::omp::Directive::OMPD_taskgroup: - Word("TASKGROUP "); - break; - case llvm::omp::Directive::OMPD_task: - Word("TASK "); - break; - case llvm::omp::Directive::OMPD_teams: - Word("TEAMS "); - break; - case llvm::omp::Directive::OMPD_workshare: - Word("WORKSHARE "); - break; - default: - // Nothing to be done - break; - } - } void Unparse(const common::OmpMemoryOrderType &x) { Word(ToUpperCaseLetters(common::EnumToString(x))); } - template <typename Construct> void UnparseBlockConstruct(const Construct &x) { + void Unparse(const OmpBeginDirective &x) { BeginOpenMP(); Word("!$OMP "); - Walk(std::get<OmpDirectiveSpecification>(x.t)); + Walk(static_cast<const OmpDirectiveSpecification &>(x)); Put("\n"); EndOpenMP(); + } + + void Unparse(const OmpEndDirective &x) { + BeginOpenMP(); + Word("!$OMP END "); + Walk(static_cast<const OmpDirectiveSpecification &>(x)); + Put("\n"); + EndOpenMP(); + } + + void Unparse(const OmpBlockConstruct &x) { + Walk(std::get<OmpBeginDirective>(x.t)); Walk(std::get<Block>(x.t), ""); - if (auto &end{std::get<std::optional<OmpDirectiveSpecification>>(x.t)}) { - BeginOpenMP(); - Word("!$OMP END "); + if (auto &end{std::get<std::optional<OmpEndDirective>>(x.t)}) { Walk(*end); + } else { Put("\n"); - EndOpenMP(); } } void Unparse(const OpenMPAtomicConstruct &x) { // - UnparseBlockConstruct(x); + Unparse(static_cast<const OmpBlockConstruct &>(x)); } void Unparse(const OpenMPExecutableAllocate &x) { @@ -2624,7 +2576,7 @@ public: EndOpenMP(); } void Unparse(const OpenMPAllocatorsConstruct &x) { // - UnparseBlockConstruct(x); + Unparse(static_cast<const OmpBlockConstruct &>(x)); } void Unparse(const OmpAssumeDirective &x) { BeginOpenMP(); @@ -2764,7 +2716,7 @@ public: EndOpenMP(); } void Unparse(const OpenMPDispatchConstruct &x) { // - UnparseBlockConstruct(x); + Unparse(static_cast<const OmpBlockConstruct &>(x)); } void Unparse(const OpenMPRequiresConstruct &y) { BeginOpenMP(); @@ -2897,19 +2849,7 @@ public: EndOpenMP(); } void Unparse(const OpenMPBlockConstruct &x) { - BeginOpenMP(); - Word("!$OMP "); - Walk(std::get<OmpBeginBlockDirective>(x.t)); - Put("\n"); - EndOpenMP(); - Walk(std::get<Block>(x.t), ""); - if (auto &&end{std::get<std::optional<OmpEndBlockDirective>>(x.t)}) { - BeginOpenMP(); - Word("!$OMP END "); - Walk(*end); - Put("\n"); - EndOpenMP(); - } + Unparse(static_cast<const OmpBlockConstruct &>(x)); } void Unparse(const OpenMPLoopConstruct &x) { BeginOpenMP(); diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index 333fad0..eca50c1 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -1106,12 +1106,11 @@ void OmpStructureChecker::CheckAtomicRead( // of the following forms: // v = x // v => x - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; auto &block{std::get<parser::Block>(x.t)}; // Read cannot be conditional or have a capture statement. if (x.IsCompare() || x.IsCapture()) { - context_.Say(dirSpec.source, + context_.Say(x.BeginDir().source, "ATOMIC READ cannot have COMPARE or CAPTURE clauses"_err_en_US); return; } @@ -1142,12 +1141,11 @@ void OmpStructureChecker::CheckAtomicRead( void OmpStructureChecker::CheckAtomicWrite( const parser::OpenMPAtomicConstruct &x) { - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; auto &block{std::get<parser::Block>(x.t)}; // Write cannot be conditional or have a capture statement. if (x.IsCompare() || x.IsCapture()) { - context_.Say(dirSpec.source, + context_.Say(x.BeginDir().source, "ATOMIC WRITE cannot have COMPARE or CAPTURE clauses"_err_en_US); return; } @@ -1235,7 +1233,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { } }}; - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; auto &dir{std::get<parser::OmpDirectiveName>(dirSpec.t)}; PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_atomic); llvm::omp::Clause kind{x.GetKind()}; diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index b82e2f7..59d57a2 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -18,6 +18,7 @@ #include "flang/Common/idioms.h" #include "flang/Common/visit.h" #include "flang/Parser/char-block.h" +#include "flang/Parser/openmp-utils.h" #include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parse-tree.h" #include "flang/Parser/tools.h" @@ -196,14 +197,9 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { common::visitors{ // Allow `!$OMP ORDERED SIMD` [&](const parser::OpenMPBlockConstruct &c) { - const auto &beginBlockDir{ - std::get<parser::OmpBeginBlockDirective>(c.t)}; - const auto &beginDir{ - std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - if (beginDir.v == llvm::omp::Directive::OMPD_ordered) { - const auto &clauses{ - std::get<parser::OmpClauseList>(beginBlockDir.t)}; - for (const auto &clause : clauses.v) { + const parser::OmpDirectiveSpecification &beginSpec{c.BeginDir()}; + if (beginSpec.DirId() == llvm::omp::Directive::OMPD_ordered) { + for (const auto &clause : beginSpec.Clauses().v) { if (std::get_if<parser::OmpClause::Simd>(&clause.u)) { eligibleSIMD = true; break; @@ -247,7 +243,7 @@ void OmpStructureChecker::CheckSIMDNest(const parser::OpenMPConstruct &c) { }, c.u); if (!eligibleSIMD) { - context_.Say(parser::FindSourceLocation(c), + context_.Say(parser::omp::GetOmpDirectiveName(c).source, "The only OpenMP constructs that can be encountered during execution " "of a 'SIMD' region are the `ATOMIC` construct, the `LOOP` construct, " "the `SIMD` construct, the `SCAN` construct and the `ORDERED` " diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 20a86e9..a9c56c3 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -498,7 +498,7 @@ template <typename Checker> struct DirectiveSpellingVisitor { template <typename... Ts> static const parser::OmpDirectiveName &GetDirName( const std::tuple<Ts...> &t) { - return std::get<parser::OmpDirectiveSpecification>(t).DirName(); + return std::get<parser::OmpBeginDirective>(t).DirName(); } bool Pre(const parser::OmpSectionsDirective &x) { @@ -588,12 +588,14 @@ template <typename Checker> struct DirectiveSpellingVisitor { checker_(std::get<parser::Verbatim>(x.t).source, Directive::OMPD_requires); return false; } - - bool Pre(const parser::OmpBlockDirective &x) { - checker_(x.source, x.v); + bool Pre(const parser::OmpBeginDirective &x) { + checker_(x.DirName().source, x.DirId()); + return false; + } + bool Pre(const parser::OmpEndDirective &x) { + checker_(x.DirName().source, x.DirId()); return false; } - bool Pre(const parser::OmpLoopDirective &x) { checker_(x.source, x.v); return false; @@ -726,22 +728,22 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) { // 2.12.5 Target Construct Restriction bool eligibleTarget{true}; llvm::omp::Directive ineligibleTargetDir; + parser::CharBlock source; common::visit( common::visitors{ [&](const parser::OpenMPBlockConstruct &c) { - const auto &beginBlockDir{ - std::get<parser::OmpBeginBlockDirective>(c.t)}; - const auto &beginDir{ - std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - if (beginDir.v == llvm::omp::Directive::OMPD_target_data) { + const parser::OmpDirectiveSpecification &beginSpec{c.BeginDir()}; + source = beginSpec.DirName().source; + if (beginSpec.DirId() == llvm::omp::Directive::OMPD_target_data) { eligibleTarget = false; - ineligibleTargetDir = beginDir.v; + ineligibleTargetDir = beginSpec.DirId(); } }, [&](const parser::OpenMPStandaloneConstruct &c) { common::visit( common::visitors{ [&](const parser::OpenMPSimpleStandaloneConstruct &c) { + source = c.v.DirName().source; switch (llvm::omp::Directive dirId{c.v.DirId()}) { case llvm::omp::Directive::OMPD_target_update: case llvm::omp::Directive::OMPD_target_enter_data: @@ -762,6 +764,7 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) { std::get<parser::OmpBeginLoopDirective>(c.t)}; const auto &beginDir{ std::get<parser::OmpLoopDirective>(beginLoopDir.t)}; + source = beginLoopDir.source; if (llvm::omp::allTargetSet.test(beginDir.v)) { eligibleTarget = false; ineligibleTargetDir = beginDir.v; @@ -771,8 +774,7 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) { }, c.u); if (!eligibleTarget) { - context_.Warn(common::UsageWarning::OpenMPUsage, - parser::FindSourceLocation(c), + context_.Warn(common::UsageWarning::OpenMPUsage, source, "If %s directive is nested inside TARGET region, the behaviour is unspecified"_port_en_US, parser::ToUpperCaseLetters( getDirectiveName(ineligibleTargetDir).str())); @@ -780,25 +782,18 @@ void OmpStructureChecker::CheckTargetNest(const parser::OpenMPConstruct &c) { } void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { - const auto &beginBlockDir{std::get<parser::OmpBeginBlockDirective>(x.t)}; - const auto &endBlockDir{ - std::get<std::optional<parser::OmpEndBlockDirective>>(x.t)}; - const auto &beginDir{std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; + const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()}; + const std::optional<parser::OmpEndDirective> &endSpec{x.EndDir()}; const parser::Block &block{std::get<parser::Block>(x.t)}; - if (endBlockDir) { - const auto &endDir{std::get<parser::OmpBlockDirective>(endBlockDir->t)}; - CheckMatching<parser::OmpBlockDirective>(beginDir, endDir); - } - - PushContextAndClauseSets(beginDir.source, beginDir.v); + PushContextAndClauseSets(beginSpec.DirName().source, beginSpec.DirId()); if (llvm::omp::allTargetSet.test(GetContext().directive)) { EnterDirectiveNest(TargetNest); } if (CurrentDirectiveIsNested()) { if (llvm::omp::bottomTeamsSet.test(GetContextParent().directive)) { - HasInvalidTeamsNesting(beginDir.v, beginDir.source); + HasInvalidTeamsNesting(beginSpec.DirId(), beginSpec.source); } if (GetContext().directive == llvm::omp::Directive::OMPD_master) { CheckMasterNesting(x); @@ -807,7 +802,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { // region or a target region. if (GetContext().directive == llvm::omp::Directive::OMPD_teams && GetContextParent().directive != llvm::omp::Directive::OMPD_target) { - context_.Say(parser::FindSourceLocation(x), + context_.Say(x.BeginDir().DirName().source, "%s region can only be strictly nested within the implicit parallel " "region or TARGET region"_err_en_US, ContextDirectiveAsFortran()); @@ -824,12 +819,12 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { } } - CheckNoBranching(block, beginDir.v, beginDir.source); + CheckNoBranching(block, beginSpec.DirId(), beginSpec.source); // Target block constructs are target device constructs. Keep track of // whether any such construct has been visited to later check that REQUIRES // directives for target-related options don't appear after them. - if (llvm::omp::allTargetSet.test(beginDir.v)) { + if (llvm::omp::allTargetSet.test(beginSpec.DirId())) { deviceConstructFound_ = true; } @@ -839,8 +834,8 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { bool foundNowait{false}; parser::CharBlock NowaitSource; - auto catchCopyPrivateNowaitClauses = [&](const auto &dir, bool isEnd) { - for (auto &clause : std::get<parser::OmpClauseList>(dir.t).v) { + auto catchCopyPrivateNowaitClauses = [&](const auto &dirSpec, bool isEnd) { + for (auto &clause : dirSpec.Clauses().v) { if (clause.Id() == llvm::omp::Clause::OMPC_copyprivate) { for (const auto &ompObject : GetOmpObjectList(clause)->v) { const auto *name{parser::Unwrap<parser::Name>(ompObject)}; @@ -881,9 +876,9 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { } } }; - catchCopyPrivateNowaitClauses(beginBlockDir, false); - if (endBlockDir) { - catchCopyPrivateNowaitClauses(*endBlockDir, true); + catchCopyPrivateNowaitClauses(beginSpec, false); + if (endSpec) { + catchCopyPrivateNowaitClauses(*endSpec, true); } unsigned version{context_.langOptions().OpenMPVersion}; if (version <= 52 && NowaitSource.ToString().size() && @@ -893,7 +888,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { } } - switch (beginDir.v) { + switch (beginSpec.DirId()) { case llvm::omp::Directive::OMPD_target: if (CheckTargetBlockOnlyTeams(block)) { EnterDirectiveNest(TargetBlockOnlyTeams); @@ -901,27 +896,25 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) { break; case llvm::omp::OMPD_workshare: case llvm::omp::OMPD_parallel_workshare: - CheckWorkshareBlockStmts(block, beginDir.source); + CheckWorkshareBlockStmts(block, beginSpec.source); HasInvalidWorksharingNesting( - beginDir.source, llvm::omp::nestedWorkshareErrSet); + beginSpec.source, llvm::omp::nestedWorkshareErrSet); break; case llvm::omp::Directive::OMPD_scope: case llvm::omp::Directive::OMPD_single: // TODO: This check needs to be extended while implementing nesting of // regions checks. HasInvalidWorksharingNesting( - beginDir.source, llvm::omp::nestedWorkshareErrSet); + beginSpec.source, llvm::omp::nestedWorkshareErrSet); break; - case llvm::omp::Directive::OMPD_task: { - const auto &clauses{std::get<parser::OmpClauseList>(beginBlockDir.t)}; - for (const auto &clause : clauses.v) { + case llvm::omp::Directive::OMPD_task: + for (const auto &clause : beginSpec.Clauses().v) { if (std::get_if<parser::OmpClause::Untied>(&clause.u)) { OmpUnitedTaskDesignatorChecker check{context_}; parser::Walk(block, check); } } break; - } default: break; } @@ -934,7 +927,7 @@ void OmpStructureChecker::CheckMasterNesting( // TODO: Expand the check to include `LOOP` construct as well when it is // supported. if (IsCloselyNestedRegion(llvm::omp::nestedMasterErrSet)) { - context_.Say(parser::FindSourceLocation(x), + context_.Say(x.BeginDir().source, "`MASTER` region may not be closely nested inside of `WORKSHARING`, " "`LOOP`, `TASK`, `TASKLOOP`," " or `ATOMIC` region."_err_en_US); @@ -1034,7 +1027,7 @@ void OmpStructureChecker::ChecksOnOrderedAsBlock() { } } -void OmpStructureChecker::Leave(const parser::OmpBeginBlockDirective &) { +void OmpStructureChecker::Leave(const parser::OmpBeginDirective &) { switch (GetContext().directive) { case llvm::omp::Directive::OMPD_ordered: // [5.1] 2.19.9 Ordered Construct Restriction @@ -1601,7 +1594,7 @@ void OmpStructureChecker::Enter(const parser::OmpErrorDirective &x) { } void OmpStructureChecker::Enter(const parser::OpenMPDispatchConstruct &x) { - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; const auto &block{std::get<parser::Block>(x.t)}; PushContextAndClauseSets( dirSpec.DirName().source, llvm::omp::Directive::OMPD_dispatch); @@ -1672,7 +1665,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPExecutableAllocate &x) { void OmpStructureChecker::Enter(const parser::OpenMPAllocatorsConstruct &x) { isPredefinedAllocator = true; - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; auto &block{std::get<parser::Block>(x.t)}; PushContextAndClauseSets( dirSpec.DirName().source, llvm::omp::Directive::OMPD_allocators); @@ -1703,7 +1696,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPAllocatorsConstruct &x) { } void OmpStructureChecker::Leave(const parser::OpenMPAllocatorsConstruct &x) { - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; for (const auto &clause : dirSpec.Clauses().v) { if (const auto *allocClause{ @@ -1737,7 +1730,7 @@ void OmpStructureChecker::CheckBarrierNesting( // TODO: Expand the check to include `LOOP` construct as well when it is // supported. if (IsCloselyNestedRegion(llvm::omp::nestedBarrierErrSet)) { - context_.Say(parser::FindSourceLocation(x), + context_.Say(x.v.DirName().source, "`BARRIER` region may not be closely nested inside of `WORKSHARING`, " "`LOOP`, `TASK`, `TASKLOOP`," "`CRITICAL`, `ORDERED`, `ATOMIC` or `MASTER` region."_err_en_US); @@ -2277,22 +2270,21 @@ void OmpStructureChecker::CheckCancellationNest( } } -void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) { - const auto &dir{std::get<parser::OmpBlockDirective>(x.t)}; - ResetPartialContext(dir.source); - switch (dir.v) { +void OmpStructureChecker::Enter(const parser::OmpEndDirective &x) { + parser::CharBlock source{x.DirName().source}; + ResetPartialContext(source); + switch (x.DirId()) { case llvm::omp::Directive::OMPD_scope: - PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_end_scope); + PushContextAndClauseSets(source, llvm::omp::Directive::OMPD_end_scope); break; // 2.7.3 end-single-clause -> copyprivate-clause | // nowait-clause case llvm::omp::Directive::OMPD_single: - PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_end_single); + PushContextAndClauseSets(source, llvm::omp::Directive::OMPD_end_single); break; // 2.7.4 end-workshare -> END WORKSHARE [nowait-clause] case llvm::omp::Directive::OMPD_workshare: - PushContextAndClauseSets( - dir.source, llvm::omp::Directive::OMPD_end_workshare); + PushContextAndClauseSets(source, llvm::omp::Directive::OMPD_end_workshare); break; default: // no clauses are allowed @@ -2305,7 +2297,7 @@ void OmpStructureChecker::Enter(const parser::OmpEndBlockDirective &x) { // constructs unless a nowait clause is specified. Only OMPD_end_single and // end_workshareare popped as they are pushed while entering the // EndBlockDirective. -void OmpStructureChecker::Leave(const parser::OmpEndBlockDirective &x) { +void OmpStructureChecker::Leave(const parser::OmpEndDirective &x) { if ((GetContext().directive == llvm::omp::Directive::OMPD_end_scope) || (GetContext().directive == llvm::omp::Directive::OMPD_end_single) || (GetContext().directive == llvm::omp::Directive::OMPD_end_workshare)) { @@ -4358,11 +4350,8 @@ bool OmpStructureChecker::CheckTargetBlockOnlyTeams( parser::Unwrap<parser::OpenMPConstruct>(*it)}) { if (const auto *ompBlockConstruct{ std::get_if<parser::OpenMPBlockConstruct>(&ompConstruct->u)}) { - const auto &beginBlockDir{ - std::get<parser::OmpBeginBlockDirective>(ompBlockConstruct->t)}; - const auto &beginDir{ - std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - if (beginDir.v == llvm::omp::Directive::OMPD_teams) { + llvm::omp::Directive dirId{ompBlockConstruct->BeginDir().DirId()}; + if (dirId == llvm::omp::Directive::OMPD_teams) { nestedTeams = true; } } @@ -4408,11 +4397,7 @@ void OmpStructureChecker::CheckWorkshareBlockStmts( auto currentDir{llvm::omp::Directive::OMPD_unknown}; if (const auto *ompBlockConstruct{ std::get_if<parser::OpenMPBlockConstruct>(&ompConstruct->u)}) { - const auto &beginBlockDir{ - std::get<parser::OmpBeginBlockDirective>(ompBlockConstruct->t)}; - const auto &beginDir{ - std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - currentDir = beginDir.v; + currentDir = ompBlockConstruct->BeginDir().DirId(); } else if (const auto *ompLoopConstruct{ std::get_if<parser::OpenMPLoopConstruct>( &ompConstruct->u)}) { diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index f4a291d..6b33ca6 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -90,9 +90,9 @@ public: void Leave(const parser::OpenMPDeclarativeAssumes &); void Enter(const parser::OpenMPBlockConstruct &); void Leave(const parser::OpenMPBlockConstruct &); - void Leave(const parser::OmpBeginBlockDirective &); - void Enter(const parser::OmpEndBlockDirective &); - void Leave(const parser::OmpEndBlockDirective &); + void Leave(const parser::OmpBeginDirective &); + void Enter(const parser::OmpEndDirective &); + void Leave(const parser::OmpEndDirective &); void Enter(const parser::OpenMPSectionsConstruct &); void Leave(const parser::OpenMPSectionsConstruct &); diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index cb68369..bb28cfb 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -378,7 +378,7 @@ public: bool Pre(const parser::OpenMPBlockConstruct &); void Post(const parser::OpenMPBlockConstruct &); - void Post(const parser::OmpBeginBlockDirective &) { + void Post(const parser::OmpBeginDirective &x) { GetContext().withinConstruct = true; } @@ -519,6 +519,9 @@ public: bool Pre(const parser::OpenMPDeclarativeAllocate &); void Post(const parser::OpenMPDeclarativeAllocate &) { PopContext(); } + bool Pre(const parser::OpenMPAtomicConstruct &); + void Post(const parser::OpenMPAtomicConstruct &) { PopContext(); } + bool Pre(const parser::OpenMPDispatchConstruct &); void Post(const parser::OpenMPDispatchConstruct &) { PopContext(); } @@ -1698,9 +1701,9 @@ static std::string ScopeSourcePos(const Fortran::semantics::Scope &scope); #endif bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { - const auto &beginBlockDir{std::get<parser::OmpBeginBlockDirective>(x.t)}; - const auto &beginDir{std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - switch (beginDir.v) { + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; + llvm::omp::Directive dirId{dirSpec.DirId()}; + switch (dirId) { case llvm::omp::Directive::OMPD_masked: case llvm::omp::Directive::OMPD_parallel_masked: case llvm::omp::Directive::OMPD_master: @@ -1718,15 +1721,15 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { case llvm::omp::Directive::OMPD_parallel_workshare: case llvm::omp::Directive::OMPD_target_teams: case llvm::omp::Directive::OMPD_target_parallel: - PushContext(beginDir.source, beginDir.v); + PushContext(dirSpec.source, dirId); break; default: // TODO others break; } - if (beginDir.v == llvm::omp::Directive::OMPD_master || - beginDir.v == llvm::omp::Directive::OMPD_parallel_master) - IssueNonConformanceWarning(beginDir.v, beginDir.source, 52); + if (dirId == llvm::omp::Directive::OMPD_master || + dirId == llvm::omp::Directive::OMPD_parallel_master) + IssueNonConformanceWarning(dirId, dirSpec.source, 52); ClearDataSharingAttributeObjects(); ClearPrivateDataSharingAttributeObjects(); ClearAllocateNames(); @@ -1734,9 +1737,9 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPBlockConstruct &x) { } void OmpAttributeVisitor::Post(const parser::OpenMPBlockConstruct &x) { - const auto &beginBlockDir{std::get<parser::OmpBeginBlockDirective>(x.t)}; - const auto &beginDir{std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - switch (beginDir.v) { + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; + llvm::omp::Directive dirId{dirSpec.DirId()}; + switch (dirId) { case llvm::omp::Directive::OMPD_masked: case llvm::omp::Directive::OMPD_master: case llvm::omp::Directive::OMPD_parallel_masked: @@ -2185,6 +2188,11 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPDeclarativeAllocate &x) { return false; } +bool OmpAttributeVisitor::Pre(const parser::OpenMPAtomicConstruct &x) { + PushContext(x.source, llvm::omp::Directive::OMPD_atomic); + return true; +} + bool OmpAttributeVisitor::Pre(const parser::OpenMPDispatchConstruct &x) { PushContext(x.source, llvm::omp::Directive::OMPD_dispatch); return true; @@ -2202,7 +2210,7 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPExecutableAllocate &x) { } bool OmpAttributeVisitor::Pre(const parser::OpenMPAllocatorsConstruct &x) { - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; PushContext(x.source, dirSpec.DirId()); for (const auto &clause : dirSpec.Clauses().v) { @@ -2288,7 +2296,7 @@ void OmpAttributeVisitor::Post(const parser::OpenMPExecutableAllocate &x) { } void OmpAttributeVisitor::Post(const parser::OpenMPAllocatorsConstruct &x) { - auto &dirSpec{std::get<parser::OmpDirectiveSpecification>(x.t)}; + const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; auto &block{std::get<parser::Block>(x.t)}; omp::SourcedActionStmt action{omp::GetActionStmt(block)}; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 2611470..25b1370 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1484,18 +1484,18 @@ public: } bool Pre(const parser::OpenMPBlockConstruct &); void Post(const parser::OpenMPBlockConstruct &); - bool Pre(const parser::OmpBeginBlockDirective &x) { + bool Pre(const parser::OmpBeginDirective &x) { AddOmpSourceRange(x.source); return true; } - void Post(const parser::OmpBeginBlockDirective &) { + void Post(const parser::OmpBeginDirective &) { messageHandler().set_currStmtSource(std::nullopt); } - bool Pre(const parser::OmpEndBlockDirective &x) { + bool Pre(const parser::OmpEndDirective &x) { AddOmpSourceRange(x.source); return true; } - void Post(const parser::OmpEndBlockDirective &) { + void Post(const parser::OmpEndDirective &) { messageHandler().set_currStmtSource(std::nullopt); } @@ -1725,9 +1725,7 @@ private: }; bool OmpVisitor::NeedsScope(const parser::OpenMPBlockConstruct &x) { - const auto &beginBlockDir{std::get<parser::OmpBeginBlockDirective>(x.t)}; - const auto &beginDir{std::get<parser::OmpBlockDirective>(beginBlockDir.t)}; - switch (beginDir.v) { + switch (x.BeginDir().DirId()) { case llvm::omp::Directive::OMPD_master: case llvm::omp::Directive::OMPD_ordered: return false; diff --git a/flang/test/Lower/OpenMP/distribute-simd.f90 b/flang/test/Lower/OpenMP/distribute-simd.f90 index a436001..d0316d1 100644 --- a/flang/test/Lower/OpenMP/distribute-simd.f90 +++ b/flang/test/Lower/OpenMP/distribute-simd.f90 @@ -7,7 +7,7 @@ subroutine distribute_simd_aligned(A) use iso_c_binding type(c_ptr) :: A - + !$omp teams ! CHECK: omp.distribute @@ -57,3 +57,22 @@ subroutine distribute_simd_simdlen() !$omp end teams end subroutine distribute_simd_simdlen + +! CHECK-LABEL: func.func @_QPdistribute_simd_private( +subroutine distribute_simd_private() + integer, allocatable :: tmp + ! CHECK: omp.teams + !$omp teams + ! CHECK: omp.distribute + ! CHECK: omp.simd + ! CHECK-SAME: private(@[[PRIV_BOX_SYM:.*]] %{{.*}} -> %[[PRIV_BOX:.*]], @[[PRIV_IVAR_SYM:.*]] %{{.*}} -> %[[PRIV_IVAR:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<i32>) + ! CHECK-NEXT: omp.loop_nest (%[[IVAR:.*]]) : i32 + !$omp distribute simd private(tmp) + do index_ = 1, 10 + ! CHECK: %[[PRIV_BOX_DECL:.*]]:2 = hlfir.declare %[[PRIV_BOX]] + ! CHECK: %[[PRIV_IVAR_DECL:.*]]:2 = hlfir.declare %[[PRIV_IVAR]] + ! CHECK: hlfir.assign %[[IVAR]] to %[[PRIV_IVAR_DECL]]#0 + end do + !$omp end distribute simd + !$omp end teams +end subroutine distribute_simd_private diff --git a/flang/test/Lower/OpenMP/wsloop-simd.f90 b/flang/test/Lower/OpenMP/wsloop-simd.f90 index 49a9a52..d26e93d 100644 --- a/flang/test/Lower/OpenMP/wsloop-simd.f90 +++ b/flang/test/Lower/OpenMP/wsloop-simd.f90 @@ -7,7 +7,7 @@ subroutine do_simd_aligned(A) use iso_c_binding type(c_ptr) :: A - + ! CHECK: omp.wsloop ! CHECK-NOT: aligned({{.*}}) ! CHECK-SAME: { @@ -66,3 +66,22 @@ subroutine do_simd_reduction() end do !$omp end do simd end subroutine do_simd_reduction + +! CHECK-LABEL: func.func @_QPdo_simd_private( +subroutine do_simd_private() + integer, allocatable :: tmp + ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.simd + ! CHECK-SAME: private(@[[PRIV_BOX_SYM:.*]] %{{.*}} -> %[[PRIV_BOX:.*]], @[[PRIV_IVAR_SYM:.*]] %{{.*}} -> %[[PRIV_IVAR:.*]] : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<i32>) + ! CHECK-NEXT: omp.loop_nest (%[[IVAR:.*]]) : i32 + !$omp do simd private(tmp) + do i=1, 10 + ! CHECK: %[[PRIV_BOX_DECL:.*]]:2 = hlfir.declare %[[PRIV_BOX]] + ! CHECK: %[[PRIV_IVAR_DECL:.*]]:2 = hlfir.declare %[[PRIV_IVAR]] + ! CHECK: hlfir.assign %[[IVAR]] to %[[PRIV_IVAR_DECL]]#0 + ! CHECK: %[[PRIV_BOX_LOAD:.*]] = fir.load %[[PRIV_BOX_DECL]] + ! CHECK: hlfir.assign %{{.*}} to %[[PRIV_BOX_DECL]]#0 + ! CHECK: omp.yield + tmp = tmp + 1 + end do +end subroutine do_simd_private diff --git a/flang/test/Lower/parametrized-derived-types.f90 b/flang/test/Lower/parametrized-derived-types.f90 new file mode 100644 index 0000000..97a40c9 --- /dev/null +++ b/flang/test/Lower/parametrized-derived-types.f90 @@ -0,0 +1,19 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 +! XFAIL: * +program main + TYPE ty(k1,k2) + INTEGER ,KIND::k1,k2=5 + INTEGER::arr(k1:k2)=10 + CHARACTER(LEN=k2)::CHARACTER + END TYPE ty + TYPE,EXTENDS(ty)::ty1(k3) + INTEGER,KIND ::k3=4 + TYPE(ty(2,k3+1))::cmp_ty = ty(2,k3+1)(55,'HI') + END TYPE ty1 + TYPE ty2(l1, l2) + !ERROR: not yet implemented: parameterized derived types + INTEGER,LEN ::l1,l2 + TYPE(ty1(2,5)), ALLOCATABLE::ty1_cmp(:) + END TYPE ty2 + TYPE(ty2(4,8)) ::ty2_obj +end program main diff --git a/flang/test/Parser/OpenMP/affinity-clause.f90 b/flang/test/Parser/OpenMP/affinity-clause.f90 index 642af6a..0b96cae 100644 --- a/flang/test/Parser/OpenMP/affinity-clause.f90 +++ b/flang/test/Parser/OpenMP/affinity-clause.f90 @@ -15,8 +15,8 @@ end !UNPARSE: !$OMP END TASK !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = task +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = task !PARSE-TREE: | OmpClauseList -> OmpClause -> Affinity -> OmpAffinityClause !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' @@ -34,8 +34,8 @@ end !UNPARSE: !$OMP END TASK !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = task +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = task !PARSE-TREE: | OmpClauseList -> OmpClause -> Affinity -> OmpAffinityClause !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> ArrayElement !PARSE-TREE: | | | DataRef -> Name = 'x' @@ -60,8 +60,8 @@ end !UNPARSE: !$OMP END TASK !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = task +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = task !PARSE-TREE: | OmpClauseList -> OmpClause -> Affinity -> OmpAffinityClause !PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier !PARSE-TREE: | | | TypeDeclarationStmt diff --git a/flang/test/Parser/OpenMP/allocators-unparse.f90 b/flang/test/Parser/OpenMP/allocators-unparse.f90 index 70feb7a..079d6ac 100644 --- a/flang/test/Parser/OpenMP/allocators-unparse.f90 +++ b/flang/test/Parser/OpenMP/allocators-unparse.f90 @@ -28,7 +28,7 @@ end subroutine allocate !CHECK-NEXT: ALLOCATE(arr2(5,3)) !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAllocatorsConstruct -!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | OmpBeginDirective !PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators !PARSE-TREE-NEXT: | | OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause !PARSE-TREE-NEXT: | | | Modifier -> OmpAllocatorSimpleModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' @@ -40,7 +40,7 @@ end subroutine allocate !PARSE-TREE-NEXT: | | | | AllocateObject -> Name = 'arr1' !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAllocatorsConstruct -!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | OmpBeginDirective !PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators !PARSE-TREE-NEXT: | | OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause !PARSE-TREE-NEXT: | | | Modifier -> OmpAllocatorComplexModifier -> Scalar -> Integer -> Expr -> Designator -> DataRef -> Name = 'omp_default_mem_alloc' @@ -56,7 +56,7 @@ end subroutine allocate !PARSE-TREE-NEXT: | | | | AllocateObject -> Name = 'arr1' !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAllocatorsConstruct -!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | OmpBeginDirective !PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators !PARSE-TREE-NEXT: | | OmpClauseList -> OmpClause -> Allocate -> OmpAllocateClause !PARSE-TREE-NEXT: | | | Modifier -> OmpAlignModifier -> Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '32' @@ -70,7 +70,7 @@ end subroutine allocate !PARSE-TREE-NEXT: | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '5' !PARSE-TREE-NEXT: | | | | AllocateShapeSpec !PARSE-TREE-NEXT: | | | | | Scalar -> Integer -> Expr -> LiteralConstant -> IntLiteralConstant = '3' -!PARSE-TREE-NEXT: | OmpDirectiveSpecification +!PARSE-TREE-NEXT: | OmpEndDirective !PARSE-TREE-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocators !PARSE-TREE-NEXT: | | OmpClauseList -> !PARSE-TREE-NEXT: | | Flags = None diff --git a/flang/test/Parser/OpenMP/atomic-compare.f90 b/flang/test/Parser/OpenMP/atomic-compare.f90 index e09da4a..9b9c4f0 100644 --- a/flang/test/Parser/OpenMP/atomic-compare.f90 +++ b/flang/test/Parser/OpenMP/atomic-compare.f90 @@ -16,7 +16,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Update -> !PARSE-TREE: | | OmpClause -> Compare @@ -54,7 +54,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Update -> !PARSE-TREE: | | OmpClause -> Compare @@ -108,7 +108,7 @@ end !PARSE-TREE: | | | Expr = 'a' !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'a' !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Update -> !PARSE-TREE: | | OmpClause -> Compare @@ -145,7 +145,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Update -> !PARSE-TREE: | | OmpClause -> Capture @@ -169,7 +169,7 @@ end !PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | | Expr = 'b' !PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'b' -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpEndDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | | Flags = None @@ -197,7 +197,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Update -> !PARSE-TREE: | | OmpClause -> Capture @@ -224,7 +224,7 @@ end !PARSE-TREE: | | | | | Expr = 'b' !PARSE-TREE: | | | | | | Designator -> DataRef -> Name = 'b' !PARSE-TREE: | | | EndIfStmt -> -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpEndDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | | Flags = None @@ -254,7 +254,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Update -> !PARSE-TREE: | | OmpClause -> Capture @@ -284,7 +284,7 @@ end !PARSE-TREE: | | | | | | Expr = 'x' !PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | EndIfStmt -> -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpEndDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | | Flags = None diff --git a/flang/test/Parser/OpenMP/atomic-end.f90 b/flang/test/Parser/OpenMP/atomic-end.f90 index e5eac87..b971bb6f 100644 --- a/flang/test/Parser/OpenMP/atomic-end.f90 +++ b/flang/test/Parser/OpenMP/atomic-end.f90 @@ -16,7 +16,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Read !PARSE-TREE: | | Flags = None @@ -26,7 +26,7 @@ end !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'v' !PARSE-TREE: | | | Expr = 'x' !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'x' -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpEndDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | | Flags = None @@ -47,7 +47,7 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPAtomicConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> OmpClause -> Read !PARSE-TREE: | | Flags = None @@ -57,7 +57,7 @@ end !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'v' !PARSE-TREE: | | | Expr = 'x' !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'x' -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpEndDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = atomic !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | | Flags = None diff --git a/flang/test/Parser/OpenMP/block-construct.f90 b/flang/test/Parser/OpenMP/block-construct.f90 index 83f0f7f..ea42554 100644 --- a/flang/test/Parser/OpenMP/block-construct.f90 +++ b/flang/test/Parser/OpenMP/block-construct.f90 @@ -20,8 +20,8 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpObject -> Designator -> DataRef -> Name = 'y' @@ -45,8 +45,8 @@ end !PARSE-TREE: | | | | | | LiteralConstant -> IntLiteralConstant = '2' !PARSE-TREE: | | | | | Expr = 'x' !PARSE-TREE: | | | | | | Designator -> DataRef -> Name = 'x' -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> @@ -72,8 +72,8 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpObject -> Designator -> DataRef -> Name = 'y' @@ -129,8 +129,8 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpObject -> Designator -> DataRef -> Name = 'y' @@ -160,6 +160,6 @@ end !PARSE-TREE: | | | | | | | Expr = 'x' !PARSE-TREE: | | | | | | | | Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | EndBlockStmt -> -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> diff --git a/flang/test/Parser/OpenMP/construct-prefix-conflict.f90 b/flang/test/Parser/OpenMP/construct-prefix-conflict.f90 index 678942a..d6f5152 100644 --- a/flang/test/Parser/OpenMP/construct-prefix-conflict.f90 +++ b/flang/test/Parser/OpenMP/construct-prefix-conflict.f90 @@ -27,13 +27,13 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | Block !PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | | | OmpBeginBlockDirective -!PARSE-TREE: | | | | OmpBlockDirective -> llvm::omp::Directive = target data +!PARSE-TREE: | | | OmpBeginDirective +!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = target data !PARSE-TREE: | | | | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | | | bool = 'true' @@ -43,11 +43,11 @@ end !PARSE-TREE: | | | | | Expr -> Add !PARSE-TREE: | | | | | | Expr -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | | | | Expr -> LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: | | | OmpEndBlockDirective -!PARSE-TREE: | | | | OmpBlockDirective -> llvm::omp::Directive = target data +!PARSE-TREE: | | | OmpEndDirective +!PARSE-TREE: | | | | OmpDirectiveName -> llvm::omp::Directive = target data !PARSE-TREE: | | | | OmpClauseList -> -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> @@ -70,8 +70,8 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | Block !PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification @@ -85,8 +85,8 @@ end !PARSE-TREE: | | | Expr -> Add !PARSE-TREE: | | | | Expr -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | | Expr -> LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> @@ -109,8 +109,8 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | Block !PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification @@ -124,8 +124,8 @@ end !PARSE-TREE: | | | Expr -> Add !PARSE-TREE: | | | | Expr -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | | Expr -> LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> @@ -148,8 +148,8 @@ end !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | Block !PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPSimpleStandaloneConstruct -> OmpDirectiveSpecification @@ -163,6 +163,6 @@ end !PARSE-TREE: | | | Expr -> Add !PARSE-TREE: | | | | Expr -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | | Expr -> LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | | OmpClauseList -> diff --git a/flang/test/Parser/OpenMP/defaultmap-clause.f90 b/flang/test/Parser/OpenMP/defaultmap-clause.f90 index d908258..e7825e5 100644 --- a/flang/test/Parser/OpenMP/defaultmap-clause.f90 +++ b/flang/test/Parser/OpenMP/defaultmap-clause.f90 @@ -11,8 +11,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: | | ImplicitBehavior = From !PARSE-TREE: Block @@ -27,8 +27,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: | | ImplicitBehavior = Firstprivate !PARSE-TREE: | | Modifier -> OmpVariableCategory -> Value = Aggregate @@ -43,8 +43,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: | | ImplicitBehavior = Alloc !PARSE-TREE: | | Modifier -> OmpVariableCategory -> Value = All @@ -61,8 +61,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: | | ImplicitBehavior = Alloc !PARSE-TREE: | | Modifier -> OmpVariableCategory -> Value = Allocatable @@ -77,8 +77,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: | | ImplicitBehavior = Tofrom !PARSE-TREE: | | Modifier -> OmpVariableCategory -> Value = Scalar @@ -93,8 +93,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: | | ImplicitBehavior = Present !PARSE-TREE: | | Modifier -> OmpVariableCategory -> Value = Scalar diff --git a/flang/test/Parser/OpenMP/defaultmap-unparse.f90 b/flang/test/Parser/OpenMP/defaultmap-unparse.f90 index bbbb6fc..fa05780 100644 --- a/flang/test/Parser/OpenMP/defaultmap-unparse.f90 +++ b/flang/test/Parser/OpenMP/defaultmap-unparse.f90 @@ -34,8 +34,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = Tofrom !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Scalar @@ -46,8 +46,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = Alloc !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Scalar @@ -58,8 +58,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = None @@ -69,8 +69,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = None !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Scalar @@ -81,8 +81,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = To !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Scalar @@ -93,8 +93,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = Firstprivate !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Scalar @@ -108,8 +108,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = Tofrom !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Aggregate @@ -120,8 +120,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = Tofrom !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Allocatable @@ -134,8 +134,8 @@ program main !CHECK: !$omp end target !$omp end target -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Defaultmap -> OmpDefaultmapClause !PARSE-TREE: ImplicitBehavior = Default !PARSE-TREE: Modifier -> OmpVariableCategory -> Value = Pointer diff --git a/flang/test/Parser/OpenMP/dispatch.f90 b/flang/test/Parser/OpenMP/dispatch.f90 index 4076c00..131b4d1 100644 --- a/flang/test/Parser/OpenMP/dispatch.f90 +++ b/flang/test/Parser/OpenMP/dispatch.f90 @@ -18,7 +18,7 @@ subroutine sub(x) !UNPARSE: !$OMP END DISPATCH !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPDispatchConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = dispatch !PARSE-TREE: | | OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: | | | Scalar -> Integer -> Expr = '3_4' @@ -37,7 +37,7 @@ subroutine sub(x) !PARSE-TREE: | Block !PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt ![...] -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpEndDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = dispatch !PARSE-TREE: | | OmpClauseList -> !PARSE-TREE: | | Flags = None @@ -51,7 +51,7 @@ subroutine sub(x) !UNPARSE: r=func(a+1_4,b+2_4,c+3_4) !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPDispatchConstruct -!PARSE-TREE: | OmpDirectiveSpecification +!PARSE-TREE: | OmpBeginDirective !PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = dispatch !PARSE-TREE: | | OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: | | | Scalar -> Integer -> Expr = '3_4' @@ -60,7 +60,7 @@ subroutine sub(x) !PARSE-TREE: | | Flags = None !PARSE-TREE: | Block !PARSE-TREE: | | ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> AssignmentStmt -!PARSE-TREE-NOT: OmpDirectiveSpecification +!PARSE-TREE-NOT: OmpEndDirective !$omp dispatch device(3) is_device_ptr(x) r = func(a+1, b+2, c+3) diff --git a/flang/test/Parser/OpenMP/if-clause.f90 b/flang/test/Parser/OpenMP/if-clause.f90 index e47fbde..2bf80cb 100644 --- a/flang/test/Parser/OpenMP/if-clause.f90 +++ b/flang/test/Parser/OpenMP/if-clause.f90 @@ -24,7 +24,7 @@ program openmp_parse_if ! CHECK-NEXT: OmpDirectiveName -> llvm::omp::Directive = target exit data !$omp target exit data map(from: i) if(target exit data: cond) - ! CHECK: OmpBlockDirective -> llvm::omp::Directive = target data + ! CHECK: OmpDirectiveName -> llvm::omp::Directive = target data ! CHECK: OmpClause -> If -> OmpIfClause ! CHECK-NEXT: OmpDirectiveName -> llvm::omp::Directive = target data !$omp target data map(tofrom: i) if(target data: cond) @@ -45,7 +45,7 @@ program openmp_parse_if end do !$omp end target teams distribute parallel do simd - ! CHECK: OmpBlockDirective -> llvm::omp::Directive = task + ! CHECK: OmpDirectiveName -> llvm::omp::Directive = task ! CHECK-NEXT: OmpClause -> If -> OmpIfClause ! CHECK-NEXT: OmpDirectiveName -> llvm::omp::Directive = task !$omp task if(task: cond) diff --git a/flang/test/Parser/OpenMP/in-reduction-clause.f90 b/flang/test/Parser/OpenMP/in-reduction-clause.f90 index bb3fadb..ee59069 100644 --- a/flang/test/Parser/OpenMP/in-reduction-clause.f90 +++ b/flang/test/Parser/OpenMP/in-reduction-clause.f90 @@ -29,13 +29,13 @@ subroutine omp_in_reduction_taskgroup() end subroutine omp_in_reduction_taskgroup !PARSE-TREE: OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE-NEXT: OmpBeginBlockDirective -!PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = taskgroup +!PARSE-TREE-NEXT: OmpBeginDirective +!PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = taskgroup !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> TaskReduction -> OmpTaskReductionClause !PARSE-TREE: OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE-NEXT: OmpBeginBlockDirective -!PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = task +!PARSE-TREE-NEXT: OmpBeginDirective +!PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = task !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> InReduction -> OmpInReductionClause !PARSE-TREE-NEXT: OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE-NEXT: OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'z' @@ -66,8 +66,8 @@ subroutine omp_in_reduction_parallel() end subroutine omp_in_reduction_parallel !PARSE-TREE: OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE-NEXT: OmpBeginBlockDirective -!PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = parallel +!PARSE-TREE-NEXT: OmpBeginDirective +!PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = parallel !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Reduction -> OmpReductionClause !PARSE-TREE: OpenMPConstruct -> OpenMPLoopConstruct diff --git a/flang/test/Parser/OpenMP/map-modifiers-v60.f90 b/flang/test/Parser/OpenMP/map-modifiers-v60.f90 index bc80886..46d57a0 100644 --- a/flang/test/Parser/OpenMP/map-modifiers-v60.f90 +++ b/flang/test/Parser/OpenMP/map-modifiers-v60.f90 @@ -15,8 +15,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpAlwaysModifier -> Value = Always !PARSE-TREE: | | Modifier -> OmpCloseModifier -> Value = Close @@ -38,8 +38,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpSelfModifier -> Value = Self !PARSE-TREE: | | Modifier -> OmpMapType -> Value = Storage @@ -60,8 +60,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpRefModifier -> Value = Ref_Ptr !PARSE-TREE: | | Modifier -> OmpMapType -> Value = To @@ -82,8 +82,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpRefModifier -> Value = Ref_Ptee !PARSE-TREE: | | Modifier -> OmpMapType -> Value = To @@ -104,8 +104,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpRefModifier -> Value = Ref_Ptr_Ptee !PARSE-TREE: | | Modifier -> OmpMapType -> Value = To diff --git a/flang/test/Parser/OpenMP/map-modifiers.f90 b/flang/test/Parser/OpenMP/map-modifiers.f90 index 4e034e5..83662b7 100644 --- a/flang/test/Parser/OpenMP/map-modifiers.f90 +++ b/flang/test/Parser/OpenMP/map-modifiers.f90 @@ -15,8 +15,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always @@ -40,8 +40,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always @@ -64,8 +64,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapType -> Value = From !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' @@ -85,8 +85,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | bool = 'true' @@ -105,8 +105,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always @@ -130,8 +130,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Ompx_Hold !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Always @@ -155,8 +155,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present !PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier @@ -190,8 +190,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present !PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier @@ -225,8 +225,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present !PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier @@ -283,8 +283,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapTypeModifier -> Value = Present !PARSE-TREE: | | Modifier -> OmpIterator -> OmpIteratorSpecifier @@ -334,8 +334,8 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | Modifier -> OmpMapper -> Name = 'xx' !PARSE-TREE: | | Modifier -> OmpMapType -> Value = From @@ -355,7 +355,7 @@ end !UNPARSE: !$OMP END TARGET !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | Modifier -> OmpMapTypeModifier -> Value = Present !PARSE-TREE: | Modifier -> OmpIterator -> OmpIteratorSpecifier diff --git a/flang/test/Parser/OpenMP/masked-unparse.f90 b/flang/test/Parser/OpenMP/masked-unparse.f90 index 16d7ca6..46ddd37 100644 --- a/flang/test/Parser/OpenMP/masked-unparse.f90 +++ b/flang/test/Parser/OpenMP/masked-unparse.f90 @@ -6,14 +6,14 @@ subroutine test_masked() integer :: c = 1 - !PARSE-TREE: OmpBeginBlockDirective - !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = masked + !PARSE-TREE: OmpBeginDirective + !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = masked !CHECK: !$omp masked !$omp masked c = c + 1 !$omp end masked - !PARSE-TREE: OmpBeginBlockDirective - !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = masked + !PARSE-TREE: OmpBeginDirective + !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = masked !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Filter -> Scalar -> Integer -> Expr = '1_4' !PARSE-TREE-NEXT: LiteralConstant -> IntLiteralConstant = '1' !CHECK: !$omp masked filter(1_4) @@ -51,8 +51,8 @@ end subroutine subroutine test_parallel_masked integer, parameter :: i = 1, j = 1 integer :: c = 2 - !PARSE-TREE: OmpBeginBlockDirective - !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = parallel masked + !PARSE-TREE: OmpBeginDirective + !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = parallel masked !PARSE-TREE-NEXT: OmpClauseList -> OmpClause -> Filter -> Scalar -> Integer -> Expr = '2_4' !PARSE-TREE-NEXT: Add !PARSE-TREE-NEXT: Expr = '1_4' diff --git a/flang/test/Parser/OpenMP/master-unparse.f90 b/flang/test/Parser/OpenMP/master-unparse.f90 index 30c293a..ec7a7d3 100644 --- a/flang/test/Parser/OpenMP/master-unparse.f90 +++ b/flang/test/Parser/OpenMP/master-unparse.f90 @@ -6,8 +6,8 @@ subroutine test_master() integer :: c = 1 - !PARSE-TREE: OmpBeginBlockDirective - !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = master + !PARSE-TREE: OmpBeginDirective + !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = master !CHECK: !$omp master !$omp master c = c + 1 @@ -40,8 +40,8 @@ end subroutine subroutine test_parallel_master integer :: c = 2 - !PARSE-TREE: OmpBeginBlockDirective - !PARSE-TREE-NEXT: OmpBlockDirective -> llvm::omp::Directive = parallel master + !PARSE-TREE: OmpBeginDirective + !PARSE-TREE-NEXT: OmpDirectiveName -> llvm::omp::Directive = parallel master !CHECK: !$omp parallel master !$omp parallel master c = c + 2 diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 index 6a1d565..69a0de6 100644 --- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 +++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 @@ -170,14 +170,14 @@ end !UNPARSE: SUBROUTINE f06 !UNPARSE: IMPLICIT NONE !UNPARSE: INTEGER i -!UNPARSE: !$OMP TARGET DATA MAP(TOFROM: i) +!UNPARSE: !$OMP TARGET_DATA MAP(TOFROM: i) !UNPARSE: i=0_4 -!UNPARSE: !$OMP END TARGET DATA +!UNPARSE: !$OMP END TARGET_DATA !UNPARSE: END SUBROUTINE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: | OmpBeginBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target data +!PARSE-TREE: | OmpBeginDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target data !PARSE-TREE: | | OmpClauseList -> OmpClause -> Map -> OmpMapClause !PARSE-TREE: | | | Modifier -> OmpMapType -> Value = Tofrom !PARSE-TREE: | | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'i' @@ -188,8 +188,8 @@ end !PARSE-TREE: | | | | Designator -> DataRef -> Name = 'i' !PARSE-TREE: | | | Expr = '0_4' !PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0' -!PARSE-TREE: | OmpEndBlockDirective -!PARSE-TREE: | | OmpBlockDirective -> llvm::omp::Directive = target data +!PARSE-TREE: | OmpEndDirective +!PARSE-TREE: | | OmpDirectiveName -> llvm::omp::Directive = target data !PARSE-TREE: | | OmpClauseList -> subroutine f07 diff --git a/flang/test/Parser/OpenMP/proc-bind.f90 b/flang/test/Parser/OpenMP/proc-bind.f90 index 3115b37..98ce39e 100644 --- a/flang/test/Parser/OpenMP/proc-bind.f90 +++ b/flang/test/Parser/OpenMP/proc-bind.f90 @@ -4,8 +4,8 @@ ! CHECK: !$OMP PARALLEL PROC_BIND(PRIMARY) ! PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -! PARSE-TREE: OmpBeginBlockDirective -! PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = parallel +! PARSE-TREE: OmpBeginDirective +! PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = parallel ! PARSE-TREE: OmpClauseList -> OmpClause -> ProcBind -> OmpProcBindClause -> AffinityPolicy = Primary subroutine sb1 !$omp parallel proc_bind(primary) diff --git a/flang/test/Parser/OpenMP/scope.f90 b/flang/test/Parser/OpenMP/scope.f90 index 6574136..9e046d6 100644 --- a/flang/test/Parser/OpenMP/scope.f90 +++ b/flang/test/Parser/OpenMP/scope.f90 @@ -9,13 +9,13 @@ program omp_scope !CHECK: !$OMP END SCOPE !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPBlockConstruct -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = scope !PARSE-TREE: OmpClauseList -> OmpClause -> Private -> OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'i' !PARSE-TREE: Block !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> ActionStmt -> PrintStmt -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = scope +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = scope !PARSE-TREE: OmpClauseList -> OmpClause -> Nowait !$omp scope private(i) diff --git a/flang/test/Parser/OpenMP/target_device_parse.f90 b/flang/test/Parser/OpenMP/target_device_parse.f90 index 7f5bee3..d3c9c69 100644 --- a/flang/test/Parser/OpenMP/target_device_parse.f90 +++ b/flang/test/Parser/OpenMP/target_device_parse.f90 @@ -20,13 +20,13 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: Scalar -> Integer -> Expr = '1_4' !PARSE-TREE: LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> !------------------------------------------------------ @@ -38,8 +38,8 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: Scalar -> Integer -> Expr = '1_4' !PARSE-TREE: Subtract @@ -47,8 +47,8 @@ PROGRAM main !PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2' !PARSE-TREE: Expr = '1_4' !PARSE-TREE: LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> @@ -61,13 +61,13 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: Scalar -> Integer -> Expr = 'x' !PARSE-TREE: Designator -> DataRef -> Name = 'x' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> @@ -80,8 +80,8 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: Scalar -> Integer -> Expr = 'x+y' !PARSE-TREE: Add @@ -89,8 +89,8 @@ PROGRAM main !PARSE-TREE: Designator -> DataRef -> Name = 'x' !PARSE-TREE: Expr = 'y' !PARSE-TREE: Designator -> DataRef -> Name = 'y' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> !------------------------------------------------------ @@ -102,14 +102,14 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: OmpDeviceModifier -> Value = Ancestor !PARSE-TREE: Scalar -> Integer -> Expr = '1_4' !PARSE-TREE: LiteralConstant -> IntLiteralConstant = '1' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> @@ -122,14 +122,14 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: OmpDeviceModifier -> Value = Device_Num !PARSE-TREE: Scalar -> Integer -> Expr = '2_4' !PARSE-TREE: LiteralConstant -> IntLiteralConstant = '2' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> @@ -142,8 +142,8 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: OmpDeviceModifier -> Value = Ancestor !PARSE-TREE: Scalar -> Integer -> Expr = 'x+y' @@ -152,8 +152,8 @@ PROGRAM main !PARSE-TREE: Designator -> DataRef -> Name = 'x' !PARSE-TREE: Expr = 'y' !PARSE-TREE: Designator -> DataRef -> Name = 'y' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> @@ -166,8 +166,8 @@ PROGRAM main !CHECK: !$OMP END TARGET !$OMP END TARGET -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target !PARSE-TREE: OmpClauseList -> OmpClause -> Device -> OmpDeviceClause !PARSE-TREE: OmpDeviceModifier -> Value = Device_Num !PARSE-TREE: Scalar -> Integer -> Expr = 'x-y' @@ -176,6 +176,6 @@ PROGRAM main !PARSE-TREE: Designator -> DataRef -> Name = 'x' !PARSE-TREE: Expr = 'y' !PARSE-TREE: Designator -> DataRef -> Name = 'y' -!PARSE-TREE: OmpEndBlockDirective -!PARSE-TREE: OmpBlockDirective -> llvm::omp::Directive = target +!PARSE-TREE: OmpEndDirective +!PARSE-TREE: OmpDirectiveName -> llvm::omp::Directive = target END PROGRAM diff --git a/flang/test/Parser/OpenMP/task-reduction-clause.f90 b/flang/test/Parser/OpenMP/task-reduction-clause.f90 index 248ff791..e3e6962 100644 --- a/flang/test/Parser/OpenMP/task-reduction-clause.f90 +++ b/flang/test/Parser/OpenMP/task-reduction-clause.f90 @@ -15,8 +15,8 @@ end !UNPARSE: !$OMP END TASKGROUP !UNPARSE: END SUBROUTINE -!PARSE-TREE: OmpBeginBlockDirective -!PARSE-TREE: | OmpBlockDirective -> llvm::omp::Directive = taskgroup +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = taskgroup !PARSE-TREE: | OmpClauseList -> OmpClause -> TaskReduction -> OmpTaskReductionClause !PARSE-TREE: | | Modifier -> OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' diff --git a/flang/test/Parser/OpenMP/task.f90 b/flang/test/Parser/OpenMP/task.f90 index 706deb3..d6508df 100644 --- a/flang/test/Parser/OpenMP/task.f90 +++ b/flang/test/Parser/OpenMP/task.f90 @@ -2,7 +2,7 @@ ! RUN: %flang_fc1 %openmp_flags -fdebug-dump-parse-tree -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case %s ! RUN: %flang_fc1 %openmp_flags -fdebug-unparse -fopenmp -fopenmp-version=50 %s | FileCheck --ignore-case --check-prefix="CHECK-UNPARSE" %s -!CHECK: OmpBlockDirective -> llvm::omp::Directive = task +!CHECK: OmpDirectiveName -> llvm::omp::Directive = task !CHECK: OmpClauseList -> OmpClause -> Detach -> OmpDetachClause -> OmpObject -> Designator -> DataRef -> Name = 'event' !CHECK-UNPARSE: INTEGER(KIND=8_4) event diff --git a/flang/test/Semantics/OpenMP/atomic04.f90 b/flang/test/Semantics/OpenMP/atomic04.f90 index fb87ca5..8f8af31 100644 --- a/flang/test/Semantics/OpenMP/atomic04.f90 +++ b/flang/test/Semantics/OpenMP/atomic04.f90 @@ -180,7 +180,7 @@ subroutine more_invalid_atomic_update_stmts() x = x !$omp atomic update - !ERROR: The atomic variable x should appear as an argument in the update operation + !ERROR: This is not a valid ATOMIC UPDATE operation x = 1 !$omp atomic update diff --git a/flang/test/Semantics/OpenMP/atomic05.f90 b/flang/test/Semantics/OpenMP/atomic05.f90 index 77ffc6e..e0103be 100644 --- a/flang/test/Semantics/OpenMP/atomic05.f90 +++ b/flang/test/Semantics/OpenMP/atomic05.f90 @@ -19,7 +19,7 @@ program OmpAtomic x = 2 * 4 !ERROR: At most one clause from the 'memory-order' group is allowed on ATOMIC construct !$omp atomic update release, seq_cst - !ERROR: The atomic variable x should appear as an argument in the update operation + !ERROR: This is not a valid ATOMIC UPDATE operation x = 10 !ERROR: At most one clause from the 'memory-order' group is allowed on ATOMIC construct !$omp atomic capture release, seq_cst diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index bc13718..e725e26 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -163,8 +163,7 @@ use omp_lib !$omp parallel do i = 1, N enddo - !ERROR: Unmatched END TARGET directive - !$omp end target + !$omp end parallel ! OMP 5.0 - 2.6 Restriction point 1 outofparallel: do k =1, 10 diff --git a/flang/test/Semantics/OpenMP/if-clause-45.f90 b/flang/test/Semantics/OpenMP/if-clause-45.f90 new file mode 100644 index 0000000..b0b0a243 --- /dev/null +++ b/flang/test/Semantics/OpenMP/if-clause-45.f90 @@ -0,0 +1,675 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45 +! Check OpenMP 'if' clause validity for all directives that can have it with OpenMP 45 + +program main + integer :: i + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp teams + !$omp distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !$omp distribute parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !ERROR: TARGET is not a constituent of the DISTRIBUTE PARALLEL DO directive + !$omp distribute parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive + !$omp distribute parallel do if(.true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end distribute parallel do + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp teams + !$omp distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp distribute parallel do simd if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + !ERROR: TARGET is not a constituent of the DISTRIBUTE PARALLEL DO SIMD directive + !$omp distribute parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !$omp teams + !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !$omp distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp distribute simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: TARGET is not a constituent of the DISTRIBUTE SIMD directive + !$omp distribute simd if(target: .true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp distribute simd if(.true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end distribute simd + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DO SIMD + ! ---------------------------------------------------------------------------- + !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 + !$omp do simd if(.true.) + do i = 1, 10 + end do + !$omp end do simd + + !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp do simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end do simd + + !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: TARGET is not a constituent of the DO SIMD directive + !$omp do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end do simd + + !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: At most one IF clause can appear on the DO SIMD directive + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp do simd if(.true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end do simd + + ! ---------------------------------------------------------------------------- + ! PARALLEL + ! ---------------------------------------------------------------------------- + !$omp parallel if(.true.) + !$omp end parallel + + !$omp parallel if(parallel: .true.) + !$omp end parallel + + !ERROR: TARGET is not a constituent of the PARALLEL directive + !$omp parallel if(target: .true.) + !$omp end parallel + + !ERROR: At most one IF clause can appear on the PARALLEL directive + !$omp parallel if(.true.) if(parallel: .false.) + !$omp end parallel + + ! ---------------------------------------------------------------------------- + ! PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp parallel do if(.true.) + do i = 1, 10 + end do + !$omp end parallel do + + !$omp parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end parallel do + + !ERROR: TARGET is not a constituent of the PARALLEL DO directive + !$omp parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end parallel do + + !ERROR: At most one IF clause can appear on the PARALLEL DO directive + !$omp parallel do if(.true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end parallel do + + ! ---------------------------------------------------------------------------- + ! PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end parallel do simd + + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp parallel do simd if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end parallel do simd + + !ERROR: TARGET is not a constituent of the PARALLEL DO SIMD directive + !$omp parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end parallel do simd + + ! ---------------------------------------------------------------------------- + ! PARALLEL SECTIONS + ! ---------------------------------------------------------------------------- + !$omp parallel sections if(.true.) + !$omp end parallel sections + + !$omp parallel sections if(parallel: .true.) + !$omp end parallel sections + + !ERROR: TARGET is not a constituent of the PARALLEL SECTIONS directive + !$omp parallel sections if(target: .true.) + !$omp end parallel sections + + !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive + !$omp parallel sections if(.true.) if(parallel: .false.) + !$omp end parallel sections + + ! ---------------------------------------------------------------------------- + ! PARALLEL WORKSHARE + ! ---------------------------------------------------------------------------- + !$omp parallel workshare if(.true.) + !$omp end parallel workshare + + !$omp parallel workshare if(parallel: .true.) + !$omp end parallel workshare + + !ERROR: TARGET is not a constituent of the PARALLEL WORKSHARE directive + !$omp parallel workshare if(target: .true.) + !$omp end parallel workshare + + !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive + !$omp parallel workshare if(.true.) if(parallel: .false.) + !$omp end parallel workshare + + ! ---------------------------------------------------------------------------- + ! SIMD + ! ---------------------------------------------------------------------------- + !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 + !$omp simd if(.true.) + do i = 1, 10 + end do + !$omp end simd + + !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end simd + + !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: TARGET is not a constituent of the SIMD directive + !$omp simd if(target: .true.) + do i = 1, 10 + end do + !$omp end simd + + !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: At most one IF clause can appear on the SIMD directive + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp simd if(.true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end simd + + ! ---------------------------------------------------------------------------- + ! TARGET + ! ---------------------------------------------------------------------------- + !$omp target if(.true.) + !$omp end target + + !$omp target if(target: .true.) + !$omp end target + + !ERROR: PARALLEL is not a constituent of the TARGET directive + !$omp target if(parallel: .true.) + !$omp end target + + !ERROR: At most one IF clause can appear on the TARGET directive + !$omp target if(.true.) if(target: .false.) + !$omp end target + + ! ---------------------------------------------------------------------------- + ! TARGET DATA + ! ---------------------------------------------------------------------------- + !$omp target data map(tofrom: i) if(.true.) + !$omp end target data + + !$omp target data map(tofrom: i) if(target data: .true.) + !$omp end target data + + !ERROR: TARGET is not a constituent of the TARGET DATA directive + !$omp target data map(tofrom: i) if(target: .true.) + !$omp end target data + + !ERROR: At most one IF clause can appear on the TARGET DATA directive + !$omp target data map(tofrom: i) if(.true.) if(target data: .false.) + !$omp end target data + + ! ---------------------------------------------------------------------------- + ! TARGET ENTER DATA + ! ---------------------------------------------------------------------------- + !$omp target enter data map(to: i) if(.true.) + + !$omp target enter data map(to: i) if(target enter data: .true.) + + !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive + !$omp target enter data map(to: i) if(target: .true.) + + !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive + !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.) + + ! ---------------------------------------------------------------------------- + ! TARGET EXIT DATA + ! ---------------------------------------------------------------------------- + !$omp target exit data map(from: i) if(.true.) + + !$omp target exit data map(from: i) if(target exit data: .true.) + + !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive + !$omp target exit data map(from: i) if(target: .true.) + + !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive + !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.) + + ! ---------------------------------------------------------------------------- + ! TARGET PARALLEL + ! ---------------------------------------------------------------------------- + !$omp target parallel if(.true.) + !$omp end target parallel + + !$omp target parallel if(target: .true.) if(parallel: .false.) + !$omp end target parallel + + !ERROR: SIMD is not a constituent of the TARGET PARALLEL directive + !$omp target parallel if(simd: .true.) + !$omp end target parallel + + ! ---------------------------------------------------------------------------- + ! TARGET PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp target parallel do if(.true.) + do i = 1, 10 + end do + !$omp end target parallel do + + !$omp target parallel do if(target: .true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end target parallel do + + !ERROR: SIMD is not a constituent of the TARGET PARALLEL DO directive + !$omp target parallel do if(simd: .true.) + do i = 1, 10 + end do + !$omp end target parallel do + + ! ---------------------------------------------------------------------------- + ! TARGET PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp target parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end target parallel do simd + + !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp& if(simd: .true.) + do i = 1, 10 + end do + !$omp end target parallel do simd + + !ERROR: TEAMS is not a constituent of the TARGET PARALLEL DO SIMD directive + !$omp target parallel do simd if(teams: .true.) + do i = 1, 10 + end do + !$omp end target parallel do simd + + ! ---------------------------------------------------------------------------- + ! TARGET SIMD + ! ---------------------------------------------------------------------------- + !$omp target simd if(.true.) + do i = 1, 10 + end do + !$omp end target simd + + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp target simd if(target: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end target simd + + !ERROR: PARALLEL is not a constituent of the TARGET SIMD directive + !$omp target simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target simd + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS + ! ---------------------------------------------------------------------------- + !$omp target teams if(.true.) + !$omp end target teams + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp target teams if(target: .true.) if(teams: .false.) + !$omp end target teams + + !ERROR: PARALLEL is not a constituent of the TARGET TEAMS directive + !$omp target teams if(parallel: .true.) + !$omp end target teams + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE + ! ---------------------------------------------------------------------------- + !$omp target teams distribute if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp target teams distribute if(target: .true.) if(teams: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute + + !ERROR: PARALLEL is not a constituent of the TARGET TEAMS DISTRIBUTE directive + !$omp target teams distribute if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp target teams distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + !$omp target teams distribute parallel do & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp& if(target: .true.) if(teams: .false.) if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + !ERROR: SIMD is not a constituent of the TARGET TEAMS DISTRIBUTE PARALLEL DO directive + !$omp target teams distribute parallel do if(simd: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp target teams distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + !$omp target teams distribute parallel do simd & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp& if(target: .true.) if(teams: .false.) if(parallel: .true.) & + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp& if(simd: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + !ERROR: TASK is not a constituent of the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive + !$omp target teams distribute parallel do simd if(task: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !$omp target teams distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + !$omp target teams distribute simd & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp& if(target: .true.) if(teams: .false.) if(simd: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + !ERROR: PARALLEL is not a constituent of the TARGET TEAMS DISTRIBUTE SIMD directive + !$omp target teams distribute simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! ---------------------------------------------------------------------------- + ! TARGET UPDATE + ! ---------------------------------------------------------------------------- + !$omp target update to(i) if(.true.) + + !$omp target update to(i) if(target update: .true.) + + !ERROR: TARGET is not a constituent of the TARGET UPDATE directive + !$omp target update to(i) if(target: .true.) + + !ERROR: At most one IF clause can appear on the TARGET UPDATE directive + !$omp target update to(i) if(.true.) if(target update: .false.) + + ! ---------------------------------------------------------------------------- + ! TASK + ! ---------------------------------------------------------------------------- + !$omp task if(.true.) + !$omp end task + + !$omp task if(task: .true.) + !$omp end task + + !ERROR: TARGET is not a constituent of the TASK directive + !$omp task if(target: .true.) + !$omp end task + + !ERROR: At most one IF clause can appear on the TASK directive + !$omp task if(.true.) if(task: .false.) + !$omp end task + + ! ---------------------------------------------------------------------------- + ! TASKLOOP + ! ---------------------------------------------------------------------------- + !$omp taskloop if(.true.) + do i = 1, 10 + end do + !$omp end taskloop + + !$omp taskloop if(taskloop: .true.) + do i = 1, 10 + end do + !$omp end taskloop + + !ERROR: TARGET is not a constituent of the TASKLOOP directive + !$omp taskloop if(target: .true.) + do i = 1, 10 + end do + !$omp end taskloop + + !ERROR: At most one IF clause can appear on the TASKLOOP directive + !$omp taskloop if(.true.) if(taskloop: .false.) + do i = 1, 10 + end do + !$omp end taskloop + + ! ---------------------------------------------------------------------------- + ! TASKLOOP SIMD + ! ---------------------------------------------------------------------------- + !$omp taskloop simd if(.true.) + do i = 1, 10 + end do + !$omp end taskloop simd + + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp taskloop simd if(taskloop: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end taskloop simd + + !ERROR: TARGET is not a constituent of the TASKLOOP SIMD directive + !$omp taskloop simd if(target: .true.) + do i = 1, 10 + end do + !$omp end taskloop simd + + ! ---------------------------------------------------------------------------- + ! TEAMS + ! ---------------------------------------------------------------------------- + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams if(.true.) + !$omp end teams + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams if(teams: .true.) + !$omp end teams + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: TARGET is not a constituent of the TEAMS directive + !$omp teams if(target: .true.) + !$omp end teams + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: At most one IF clause can appear on the TEAMS directive + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams if(.true.) if(teams: .false.) + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE + ! ---------------------------------------------------------------------------- + !$omp teams distribute if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams distribute if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE directive + !$omp teams distribute if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams distribute if(.true.) if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp teams distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams distribute parallel do if(teams: .true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE PARALLEL DO directive + !$omp teams distribute parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp teams distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + !$omp teams distribute parallel do simd & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp& if(teams: .true.) if(parallel: .true.) if(simd: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE PARALLEL DO SIMD directive + !$omp teams distribute parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !ERROR: IF clause is not allowed on directive TEAMS DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !$omp teams distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + !ERROR: IF clause is not allowed on directive TEAMS DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 + !ERROR: IF clause is not allowed on directive TEAMS DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 + !$omp teams distribute simd if(teams: .true.) if(simd: .true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + !ERROR: IF clause is not allowed on directive TEAMS DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE SIMD directive + !$omp teams distribute simd if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute simd +end program main diff --git a/flang/test/Semantics/OpenMP/if-clause-50.f90 b/flang/test/Semantics/OpenMP/if-clause-50.f90 new file mode 100644 index 0000000..eb59776f --- /dev/null +++ b/flang/test/Semantics/OpenMP/if-clause-50.f90 @@ -0,0 +1,641 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 +! Check OpenMP 'if' clause validity for all directives that can have it + +program main + integer :: i + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp teams + !$omp distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !$omp distribute parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !ERROR: TARGET is not a constituent of the DISTRIBUTE PARALLEL DO directive + !$omp distribute parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do + + !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive + !$omp distribute parallel do if(.true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end distribute parallel do + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp teams + !$omp distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + !$omp distribute parallel do simd if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + + !ERROR: TARGET is not a constituent of the DISTRIBUTE PARALLEL DO SIMD directive + !$omp distribute parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end distribute parallel do simd + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !$omp teams + !$omp distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !$omp distribute simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !ERROR: TARGET is not a constituent of the DISTRIBUTE SIMD directive + !$omp distribute simd if(target: .true.) + do i = 1, 10 + end do + !$omp end distribute simd + + !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive + !$omp distribute simd if(.true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end distribute simd + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! DO SIMD + ! ---------------------------------------------------------------------------- + !$omp do simd if(.true.) + do i = 1, 10 + end do + !$omp end do simd + + !$omp do simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end do simd + + !ERROR: TARGET is not a constituent of the DO SIMD directive + !$omp do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end do simd + + !ERROR: At most one IF clause can appear on the DO SIMD directive + !$omp do simd if(.true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end do simd + + ! ---------------------------------------------------------------------------- + ! PARALLEL + ! ---------------------------------------------------------------------------- + !$omp parallel if(.true.) + !$omp end parallel + + !$omp parallel if(parallel: .true.) + !$omp end parallel + + !ERROR: TARGET is not a constituent of the PARALLEL directive + !$omp parallel if(target: .true.) + !$omp end parallel + + !ERROR: At most one IF clause can appear on the PARALLEL directive + !$omp parallel if(.true.) if(parallel: .false.) + !$omp end parallel + + ! ---------------------------------------------------------------------------- + ! PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp parallel do if(.true.) + do i = 1, 10 + end do + !$omp end parallel do + + !$omp parallel do if(parallel: .true.) + do i = 1, 10 + end do + !$omp end parallel do + + !ERROR: TARGET is not a constituent of the PARALLEL DO directive + !$omp parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end parallel do + + !ERROR: At most one IF clause can appear on the PARALLEL DO directive + !$omp parallel do if(.true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end parallel do + + ! ---------------------------------------------------------------------------- + ! PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end parallel do simd + + !$omp parallel do simd if(parallel: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end parallel do simd + + !ERROR: TARGET is not a constituent of the PARALLEL DO SIMD directive + !$omp parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end parallel do simd + + ! ---------------------------------------------------------------------------- + ! PARALLEL SECTIONS + ! ---------------------------------------------------------------------------- + !$omp parallel sections if(.true.) + !$omp end parallel sections + + !$omp parallel sections if(parallel: .true.) + !$omp end parallel sections + + !ERROR: TARGET is not a constituent of the PARALLEL SECTIONS directive + !$omp parallel sections if(target: .true.) + !$omp end parallel sections + + !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive + !$omp parallel sections if(.true.) if(parallel: .false.) + !$omp end parallel sections + + ! ---------------------------------------------------------------------------- + ! PARALLEL WORKSHARE + ! ---------------------------------------------------------------------------- + !$omp parallel workshare if(.true.) + !$omp end parallel workshare + + !$omp parallel workshare if(parallel: .true.) + !$omp end parallel workshare + + !ERROR: TARGET is not a constituent of the PARALLEL WORKSHARE directive + !$omp parallel workshare if(target: .true.) + !$omp end parallel workshare + + !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive + !$omp parallel workshare if(.true.) if(parallel: .false.) + !$omp end parallel workshare + + ! ---------------------------------------------------------------------------- + ! SIMD + ! ---------------------------------------------------------------------------- + !$omp simd if(.true.) + do i = 1, 10 + end do + !$omp end simd + + !$omp simd if(simd: .true.) + do i = 1, 10 + end do + !$omp end simd + + !ERROR: TARGET is not a constituent of the SIMD directive + !$omp simd if(target: .true.) + do i = 1, 10 + end do + !$omp end simd + + !ERROR: At most one IF clause can appear on the SIMD directive + !$omp simd if(.true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end simd + + ! ---------------------------------------------------------------------------- + ! TARGET + ! ---------------------------------------------------------------------------- + !$omp target if(.true.) + !$omp end target + + !$omp target if(target: .true.) + !$omp end target + + !ERROR: PARALLEL is not a constituent of the TARGET directive + !$omp target if(parallel: .true.) + !$omp end target + + !ERROR: At most one IF clause can appear on the TARGET directive + !$omp target if(.true.) if(target: .false.) + !$omp end target + + ! ---------------------------------------------------------------------------- + ! TARGET DATA + ! ---------------------------------------------------------------------------- + !$omp target data map(tofrom: i) if(.true.) + !$omp end target data + + !$omp target data map(tofrom: i) if(target data: .true.) + !$omp end target data + + !ERROR: TARGET is not a constituent of the TARGET DATA directive + !$omp target data map(tofrom: i) if(target: .true.) + !$omp end target data + + !ERROR: At most one IF clause can appear on the TARGET DATA directive + !$omp target data map(tofrom: i) if(.true.) if(target data: .false.) + !$omp end target data + + ! ---------------------------------------------------------------------------- + ! TARGET ENTER DATA + ! ---------------------------------------------------------------------------- + !$omp target enter data map(to: i) if(.true.) + + !$omp target enter data map(to: i) if(target enter data: .true.) + + !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive + !$omp target enter data map(to: i) if(target: .true.) + + !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive + !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.) + + ! ---------------------------------------------------------------------------- + ! TARGET EXIT DATA + ! ---------------------------------------------------------------------------- + !$omp target exit data map(from: i) if(.true.) + + !$omp target exit data map(from: i) if(target exit data: .true.) + + !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive + !$omp target exit data map(from: i) if(target: .true.) + + !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive + !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.) + + ! ---------------------------------------------------------------------------- + ! TARGET PARALLEL + ! ---------------------------------------------------------------------------- + !$omp target parallel if(.true.) + !$omp end target parallel + + !$omp target parallel if(target: .true.) if(parallel: .false.) + !$omp end target parallel + + !ERROR: SIMD is not a constituent of the TARGET PARALLEL directive + !$omp target parallel if(simd: .true.) + !$omp end target parallel + + ! ---------------------------------------------------------------------------- + ! TARGET PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp target parallel do if(.true.) + do i = 1, 10 + end do + !$omp end target parallel do + + !$omp target parallel do if(target: .true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end target parallel do + + !ERROR: SIMD is not a constituent of the TARGET PARALLEL DO directive + !$omp target parallel do if(simd: .true.) + do i = 1, 10 + end do + !$omp end target parallel do + + ! ---------------------------------------------------------------------------- + ! TARGET PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp target parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end target parallel do simd + + !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & + !$omp& if(simd: .true.) + do i = 1, 10 + end do + !$omp end target parallel do simd + + !ERROR: TEAMS is not a constituent of the TARGET PARALLEL DO SIMD directive + !$omp target parallel do simd if(teams: .true.) + do i = 1, 10 + end do + !$omp end target parallel do simd + + ! ---------------------------------------------------------------------------- + ! TARGET SIMD + ! ---------------------------------------------------------------------------- + !$omp target simd if(.true.) + do i = 1, 10 + end do + !$omp end target simd + + !$omp target simd if(target: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end target simd + + !ERROR: PARALLEL is not a constituent of the TARGET SIMD directive + !$omp target simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target simd + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS + ! ---------------------------------------------------------------------------- + !$omp target teams if(.true.) + !$omp end target teams + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp target teams if(target: .true.) if(teams: .false.) + !$omp end target teams + + !ERROR: PARALLEL is not a constituent of the TARGET TEAMS directive + !$omp target teams if(parallel: .true.) + !$omp end target teams + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE + ! ---------------------------------------------------------------------------- + !$omp target teams distribute if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp target teams distribute if(target: .true.) if(teams: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute + + !ERROR: PARALLEL is not a constituent of the TARGET TEAMS DISTRIBUTE directive + !$omp target teams distribute if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp target teams distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + !$omp target teams distribute parallel do & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp& if(target: .true.) if(teams: .false.) if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + !ERROR: SIMD is not a constituent of the TARGET TEAMS DISTRIBUTE PARALLEL DO directive + !$omp target teams distribute parallel do if(simd: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp target teams distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + !$omp target teams distribute parallel do simd & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp& if(target: .true.) if(teams: .false.) if(parallel: .true.) & + !$omp& if(simd: .false.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + !ERROR: TASK is not a constituent of the TARGET TEAMS DISTRIBUTE PARALLEL DO SIMD directive + !$omp target teams distribute parallel do simd if(task: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute parallel do simd + + ! ---------------------------------------------------------------------------- + ! TARGET TEAMS DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !$omp target teams distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + !$omp target teams distribute simd & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp& if(target: .true.) if(teams: .false.) if(simd: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + !ERROR: PARALLEL is not a constituent of the TARGET TEAMS DISTRIBUTE SIMD directive + !$omp target teams distribute simd if(parallel: .true.) + do i = 1, 10 + end do + !$omp end target teams distribute simd + + ! ---------------------------------------------------------------------------- + ! TARGET UPDATE + ! ---------------------------------------------------------------------------- + !$omp target update to(i) if(.true.) + + !$omp target update to(i) if(target update: .true.) + + !ERROR: TARGET is not a constituent of the TARGET UPDATE directive + !$omp target update to(i) if(target: .true.) + + !ERROR: At most one IF clause can appear on the TARGET UPDATE directive + !$omp target update to(i) if(.true.) if(target update: .false.) + + ! ---------------------------------------------------------------------------- + ! TASK + ! ---------------------------------------------------------------------------- + !$omp task if(.true.) + !$omp end task + + !$omp task if(task: .true.) + !$omp end task + + !ERROR: TARGET is not a constituent of the TASK directive + !$omp task if(target: .true.) + !$omp end task + + !ERROR: At most one IF clause can appear on the TASK directive + !$omp task if(.true.) if(task: .false.) + !$omp end task + + ! ---------------------------------------------------------------------------- + ! TASKLOOP + ! ---------------------------------------------------------------------------- + !$omp taskloop if(.true.) + do i = 1, 10 + end do + !$omp end taskloop + + !$omp taskloop if(taskloop: .true.) + do i = 1, 10 + end do + !$omp end taskloop + + !ERROR: TARGET is not a constituent of the TASKLOOP directive + !$omp taskloop if(target: .true.) + do i = 1, 10 + end do + !$omp end taskloop + + !ERROR: At most one IF clause can appear on the TASKLOOP directive + !$omp taskloop if(.true.) if(taskloop: .false.) + do i = 1, 10 + end do + !$omp end taskloop + + ! ---------------------------------------------------------------------------- + ! TASKLOOP SIMD + ! ---------------------------------------------------------------------------- + !$omp taskloop simd if(.true.) + do i = 1, 10 + end do + !$omp end taskloop simd + + !$omp taskloop simd if(taskloop: .true.) if(simd: .false.) + do i = 1, 10 + end do + !$omp end taskloop simd + + !ERROR: TARGET is not a constituent of the TASKLOOP SIMD directive + !$omp taskloop simd if(target: .true.) + do i = 1, 10 + end do + !$omp end taskloop simd + + ! ---------------------------------------------------------------------------- + ! TEAMS + ! ---------------------------------------------------------------------------- + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams if(.true.) + !$omp end teams + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams if(teams: .true.) + !$omp end teams + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 + !ERROR: TARGET is not a constituent of the TEAMS directive + !$omp teams if(target: .true.) + !$omp end teams + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 + !ERROR: At most one IF clause can appear on the TEAMS directive + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams if(.true.) if(teams: .false.) + !$omp end teams + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE + ! ---------------------------------------------------------------------------- + !$omp teams distribute if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams distribute if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE directive + !$omp teams distribute if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams distribute if(.true.) if(teams: .true.) + do i = 1, 10 + end do + !$omp end teams distribute + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE PARALLEL DO + ! ---------------------------------------------------------------------------- + !$omp teams distribute parallel do if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams distribute parallel do if(teams: .true.) if(parallel: .false.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE PARALLEL DO directive + !$omp teams distribute parallel do if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE PARALLEL DO SIMD + ! ---------------------------------------------------------------------------- + !$omp teams distribute parallel do simd if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + !$omp teams distribute parallel do simd & + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp& if(teams: .true.) if(parallel: .true.) if(simd: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE PARALLEL DO SIMD directive + !$omp teams distribute parallel do simd if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute parallel do simd + + ! ---------------------------------------------------------------------------- + ! TEAMS DISTRIBUTE SIMD + ! ---------------------------------------------------------------------------- + !$omp teams distribute simd if(.true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams distribute simd if(teams: .true.) if(simd: .true.) + do i = 1, 10 + end do + !$omp end teams distribute simd + + !ERROR: TARGET is not a constituent of the TEAMS DISTRIBUTE SIMD directive + !$omp teams distribute simd if(target: .true.) + do i = 1, 10 + end do + !$omp end teams distribute simd +end program main diff --git a/flang/test/Semantics/OpenMP/symbol08.f90 b/flang/test/Semantics/OpenMP/symbol08.f90 index 545bccc..bf0f724 100644 --- a/flang/test/Semantics/OpenMP/symbol08.f90 +++ b/flang/test/Semantics/OpenMP/symbol08.f90 @@ -130,8 +130,7 @@ subroutine dotprod (b, c, n, block_size, num_teams, block_threads) !REF: /dotprod/sum sum = 0.0e0 !$omp target map(to:b,c) map(tofrom:sum) -!$omp teams num_teams(num_teams) thread_limit(block_threads) reduction(+: sum& -!$OMP&) +!$omp teams num_teams(num_teams) thread_limit(block_threads) reduction(+: sum) !$omp distribute !DEF: /dotprod/OtherConstruct1/OtherConstruct1/OtherConstruct1/i0 (OmpPrivate, OmpPreDetermined) HostAssoc INTEGER(4) !REF: /dotprod/n diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index 1c1f242..02fe1ad 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -432,6 +432,7 @@ add_proxy_header_library( cpu_set_t.h FULL_BUILD_DEPENDS libc.include.llvm-libc-types.cpu_set_t + libc.include.sched ) add_proxy_header_library( @@ -451,3 +452,12 @@ add_proxy_header_library( libc.include.llvm-libc-types.ACTION libc.include.search ) + +add_proxy_header_library( + struct_sched_param + HDRS + struct_sched_param.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_sched_param + libc.include.sched +) diff --git a/libc/hdr/types/struct_sched_param.h b/libc/hdr/types/struct_sched_param.h new file mode 100644 index 0000000..b1bdcf9 --- /dev/null +++ b/libc/hdr/types/struct_sched_param.h @@ -0,0 +1,22 @@ +//===-- Proxy for struct sched_param --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_SCHED_PARAM_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_SCHED_PARAM_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_sched_param.h" + +#else // Overlay mode + +#include <sched.h> + +#endif // LLVM_LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_SCHED_PARAM_H diff --git a/libc/shared/math.h b/libc/shared/math.h index 0605d91..ddf219e 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -24,8 +24,11 @@ #include "math/asinhf16.h" #include "math/atan.h" #include "math/atan2.h" +#include "math/atan2f.h" +#include "math/atan2f128.h" #include "math/atanf.h" #include "math/atanf16.h" +#include "math/atanhf.h" #include "math/erff.h" #include "math/exp.h" #include "math/exp10.h" diff --git a/libc/shared/math/atan2f.h b/libc/shared/math/atan2f.h new file mode 100644 index 0000000..2de09d2 --- /dev/null +++ b/libc/shared/math/atan2f.h @@ -0,0 +1,23 @@ +//===-- Shared atan2f function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F_H +#define LLVM_LIBC_SHARED_MATH_ATAN2F_H + +#include "shared/libc_common.h" +#include "src/__support/math/atan2f.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::atan2f; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ATAN2F_H diff --git a/libc/shared/math/atan2f128.h b/libc/shared/math/atan2f128.h new file mode 100644 index 0000000..d7aee40c --- /dev/null +++ b/libc/shared/math/atan2f128.h @@ -0,0 +1,29 @@ +//===-- Shared atan2f128 function -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ATAN2F128_H +#define LLVM_LIBC_SHARED_MATH_ATAN2F128_H + +#include "include/llvm-libc-types/float128.h" + +#ifdef LIBC_TYPES_HAS_FLOAT128 + +#include "shared/libc_common.h" +#include "src/__support/math/atan2f128.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::atan2f128; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT128 + +#endif // LLVM_LIBC_SHARED_MATH_ATAN2F128_H diff --git a/libc/shared/math/atanhf.h b/libc/shared/math/atanhf.h new file mode 100644 index 0000000..763fb3e --- /dev/null +++ b/libc/shared/math/atanhf.h @@ -0,0 +1,23 @@ +//===-- Shared atanhf function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_ATANHF_H +#define LLVM_LIBC_SHARED_MATH_ATANHF_H + +#include "shared/libc_common.h" +#include "src/__support/math/atanhf.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::atanhf; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_ATANHF_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index bbb07b6..500dd9d 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -214,6 +214,38 @@ add_header_library( ) add_header_library( + atan2f + HDRS + atan2f_float.h + atan2f.h + DEPENDS + .inv_trigf_utils + libc.src.__support.FPUtil.double_double + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.macros.config + libc.src.__support.macros.optimization +) + +add_header_library( + atan2f128 + HDRS + atan2f128.h + DEPENDS + .atan_utils + libc.src.__support.integer_literals + libc.src.__support.uint128 + libc.src.__support.FPUtil.dyadic_float + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.macros.optimization +) + +add_header_library( atanf HDRS atanf.h @@ -244,6 +276,17 @@ add_header_library( ) add_header_library( + atanhf + HDRS + atanhf.h + DEPENDS + .acoshf_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.macros.optimization +) + +add_header_library( asinf HDRS asinf.h diff --git a/libc/src/__support/math/atan2f.h b/libc/src/__support/math/atan2f.h new file mode 100644 index 0000000..e3b1932 --- /dev/null +++ b/libc/src/__support/math/atan2f.h @@ -0,0 +1,351 @@ +//===-- Implementation header for atan2f ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H + +#include "inv_trigf_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ + defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) + +// We use float-float implementation to reduce size. +#include "atan2f_float.h" + +#else + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +namespace atan2f_internal { + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + +// Look up tables for accurate pass: + +// atan(i/16) with i = 0..16, generated by Sollya with: +// > for i from 0 to 16 do { +// a = round(atan(i/16), D, RN); +// b = round(atan(i/16) - a, D, RN); +// print("{", b, ",", a, "},"); +// }; +static constexpr fputil::DoubleDouble ATAN_I[17] = { + {0.0, 0.0}, + {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5}, + {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4}, + {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3}, + {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3}, + {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2}, + {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2}, + {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2}, + {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2}, + {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1}, + {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1}, + {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1}, + {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1}, + {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1}, + {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1}, + {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1}, + {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1}, +}; + +// Taylor polynomial, generated by Sollya with: +// > for i from 0 to 8 do { +// j = (-1)^(i + 1)/(2*i + 1); +// a = round(j, D, RN); +// b = round(j - a, D, RN); +// print("{", b, ",", a, "},"); +// }; +static constexpr fputil::DoubleDouble COEFFS[9] = { + {0.0, 1.0}, // 1 + {-0x1.5555555555555p-56, -0x1.5555555555555p-2}, // -1/3 + {-0x1.999999999999ap-57, 0x1.999999999999ap-3}, // 1/5 + {-0x1.2492492492492p-57, -0x1.2492492492492p-3}, // -1/7 + {0x1.c71c71c71c71cp-58, 0x1.c71c71c71c71cp-4}, // 1/9 + {0x1.745d1745d1746p-59, -0x1.745d1745d1746p-4}, // -1/11 + {-0x1.3b13b13b13b14p-58, 0x1.3b13b13b13b14p-4}, // 1/13 + {-0x1.1111111111111p-60, -0x1.1111111111111p-4}, // -1/15 + {0x1.e1e1e1e1e1e1ep-61, 0x1.e1e1e1e1e1e1ep-5}, // 1/17 +}; + +// Veltkamp's splitting of a double precision into hi + lo, where the hi part is +// slightly smaller than an even split, so that the product of +// hi * (s1 * k + s2) is exact, +// where: +// s1, s2 are single precsion, +// 1/16 <= s1/s2 <= 1 +// 1/16 <= k <= 1 is an integer. +// So the maximal precision of (s1 * k + s2) is: +// prec(s1 * k + s2) = 2 + log2(msb(s2)) - log2(lsb(k_d * s1)) +// = 2 + log2(msb(s1)) + 4 - log2(lsb(k_d)) - log2(lsb(s1)) +// = 2 + log2(lsb(s1)) + 23 + 4 - (-4) - log2(lsb(s1)) +// = 33. +// Thus, the Veltkamp splitting constant is C = 2^33 + 1. +// This is used when FMA instruction is not available. +[[maybe_unused]] LIBC_INLINE static constexpr fputil::DoubleDouble +split_d(double a) { + fputil::DoubleDouble r{0.0, 0.0}; + constexpr double C = 0x1.0p33 + 1.0; + double t1 = C * a; + double t2 = a - t1; + r.hi = t1 + t2; + r.lo = a - r.hi; + return r; +} + +// Compute atan( num_d / den_d ) in double-double precision. +// num_d = min(|x|, |y|) +// den_d = max(|x|, |y|) +// q_d = num_d / den_d +// idx, k_d = round( 2^4 * num_d / den_d ) +// final_sign = sign of the final result +// const_term = the constant term in the final expression. +LIBC_INLINE static float +atan2f_double_double(double num_d, double den_d, double q_d, int idx, + double k_d, double final_sign, + const fputil::DoubleDouble &const_term) { + fputil::DoubleDouble q; + double num_r = 0, den_r = 0; + + if (idx != 0) { + // The following range reduction is accurate even without fma for + // 1/16 <= n/d <= 1. + // atan(n/d) - atan(idx/16) = atan((n/d - idx/16) / (1 + (n/d) * (idx/16))) + // = atan((n - d*(idx/16)) / (d + n*idx/16)) + k_d *= 0x1.0p-4; + num_r = fputil::multiply_add(k_d, -den_d, num_d); // Exact + den_r = fputil::multiply_add(k_d, num_d, den_d); // Exact + q.hi = num_r / den_r; + } else { + // For 0 < n/d < 1/16, we just need to calculate the lower part of their + // quotient. + q.hi = q_d; + num_r = num_d; + den_r = den_d; + } +#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE + q.lo = fputil::multiply_add(q.hi, -den_r, num_r) / den_r; +#else + // Compute `(num_r - q.hi * den_r) / den_r` accurately without FMA + // instructions. + fputil::DoubleDouble q_hi_dd = split_d(q.hi); + double t1 = fputil::multiply_add(q_hi_dd.hi, -den_r, num_r); // Exact + double t2 = fputil::multiply_add(q_hi_dd.lo, -den_r, t1); + q.lo = t2 / den_r; +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE + + // Taylor polynomial, evaluating using Horner's scheme: + // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15 + // + x^17/17 + // = x*(1 + x^2*(-1/3 + x^2*(1/5 + x^2*(-1/7 + x^2*(1/9 + x^2* + // *(-1/11 + x^2*(1/13 + x^2*(-1/15 + x^2 * 1/17)))))))) + fputil::DoubleDouble q2 = fputil::quick_mult(q, q); + fputil::DoubleDouble p_dd = + fputil::polyeval(q2, COEFFS[0], COEFFS[1], COEFFS[2], COEFFS[3], + COEFFS[4], COEFFS[5], COEFFS[6], COEFFS[7], COEFFS[8]); + fputil::DoubleDouble r_dd = + fputil::add(const_term, fputil::multiply_add(q, p_dd, ATAN_I[idx])); + r_dd.hi *= final_sign; + r_dd.lo *= final_sign; + + // Make sure the sum is normalized: + fputil::DoubleDouble rr = fputil::exact_add(r_dd.hi, r_dd.lo); + // Round to odd. + uint64_t rr_bits = cpp::bit_cast<uint64_t>(rr.hi); + if (LIBC_UNLIKELY(((rr_bits & 0xfff'ffff) == 0) && (rr.lo != 0.0))) { + Sign hi_sign = fputil::FPBits<double>(rr.hi).sign(); + Sign lo_sign = fputil::FPBits<double>(rr.lo).sign(); + if (hi_sign == lo_sign) { + ++rr_bits; + } else if ((rr_bits & fputil::FPBits<double>::FRACTION_MASK) > 0) { + --rr_bits; + } + } + + return static_cast<float>(cpp::bit_cast<double>(rr_bits)); +} + +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + +} // namespace atan2f_internal + +// There are several range reduction steps we can take for atan2(y, x) as +// follow: + +// * Range reduction 1: signness +// atan2(y, x) will return a number between -PI and PI representing the angle +// forming by the 0x axis and the vector (x, y) on the 0xy-plane. +// In particular, we have that: +// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) +// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) +// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) +// Since atan function is odd, we can use the formula: +// atan(-u) = -atan(u) +// to adjust the above conditions a bit further: +// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) +// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) +// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) +// Which can be simplified to: +// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 +// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 + +// * Range reduction 2: reciprocal +// Now that the argument inside atan is positive, we can use the formula: +// atan(1/x) = pi/2 - atan(x) +// to make the argument inside atan <= 1 as follow: +// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x +// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| +// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x +// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| + +// * Range reduction 3: look up table. +// After the previous two range reduction steps, we reduce the problem to +// compute atan(u) with 0 <= u <= 1, or to be precise: +// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). +// An accurate polynomial approximation for the whole [0, 1] input range will +// require a very large degree. To make it more efficient, we reduce the input +// range further by finding an integer idx such that: +// | n/d - idx/16 | <= 1/32. +// In particular, +// idx := 2^-4 * round(2^4 * n/d) +// Then for the fast pass, we find a polynomial approximation for: +// atan( n/d ) ~ atan( idx/16 ) + (n/d - idx/16) * Q(n/d - idx/16) +// For the accurate pass, we use the addition formula: +// atan( n/d ) - atan( idx/16 ) = atan( (n/d - idx/16)/(1 + (n*idx)/(16*d)) ) +// = atan( (n - d * idx/16)/(d + n * idx/16) ) +// And finally we use Taylor polynomial to compute the RHS in the accurate pass: +// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9 - u^11/11 + u^13/13 - +// - u^15/15 + u^17/17 +// It's error in double-double precision is estimated in Sollya to be: +// > P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15 +// + x^17/17; +// > dirtyinfnorm(atan(x) - P, [-2^-5, 2^-5]); +// 0x1.aec6f...p-100 +// which is about rounding errors of double-double (2^-104). + +LIBC_INLINE static constexpr float atan2f(float y, float x) { + using namespace atan2f_internal; + using namespace inv_trigf_utils_internal; + using FPBits = typename fputil::FPBits<float>; + constexpr double IS_NEG[2] = {1.0, -1.0}; + constexpr double PI = 0x1.921fb54442d18p1; + constexpr double PI_LO = 0x1.1a62633145c07p-53; + constexpr double PI_OVER_4 = 0x1.921fb54442d18p-1; + constexpr double PI_OVER_2 = 0x1.921fb54442d18p0; + constexpr double THREE_PI_OVER_4 = 0x1.2d97c7f3321d2p+1; + // Adjustment for constant term: + // CONST_ADJ[x_sign][y_sign][recip] + constexpr fputil::DoubleDouble CONST_ADJ[2][2][2] = { + {{{0.0, 0.0}, {-PI_LO / 2, -PI_OVER_2}}, + {{-0.0, -0.0}, {-PI_LO / 2, -PI_OVER_2}}}, + {{{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}, + {{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}}}; + + FPBits x_bits(x), y_bits(y); + bool x_sign = x_bits.sign().is_neg(); + bool y_sign = y_bits.sign().is_neg(); + x_bits.set_sign(Sign::POS); + y_bits.set_sign(Sign::POS); + uint32_t x_abs = x_bits.uintval(); + uint32_t y_abs = y_bits.uintval(); + uint32_t max_abs = x_abs > y_abs ? x_abs : y_abs; + uint32_t min_abs = x_abs <= y_abs ? x_abs : y_abs; + float num_f = FPBits(min_abs).get_val(); + float den_f = FPBits(max_abs).get_val(); + double num_d = static_cast<double>(num_f); + double den_d = static_cast<double>(den_f); + + if (LIBC_UNLIKELY(max_abs >= 0x7f80'0000U || num_d == 0.0)) { + if (x_bits.is_nan() || y_bits.is_nan()) { + if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + double x_d = static_cast<double>(x); + double y_d = static_cast<double>(y); + size_t x_except = (x_d == 0.0) ? 0 : (x_abs == 0x7f80'0000 ? 2 : 1); + size_t y_except = (y_d == 0.0) ? 0 : (y_abs == 0x7f80'0000 ? 2 : 1); + + // Exceptional cases: + // EXCEPT[y_except][x_except][x_is_neg] + // with x_except & y_except: + // 0: zero + // 1: finite, non-zero + // 2: infinity + constexpr double EXCEPTS[3][3][2] = { + {{0.0, PI}, {0.0, PI}, {0.0, PI}}, + {{PI_OVER_2, PI_OVER_2}, {0.0, 0.0}, {0.0, PI}}, + {{PI_OVER_2, PI_OVER_2}, + {PI_OVER_2, PI_OVER_2}, + {PI_OVER_4, THREE_PI_OVER_4}}, + }; + + double r = IS_NEG[y_sign] * EXCEPTS[y_except][x_except][x_sign]; + + return static_cast<float>(r); + } + + bool recip = x_abs < y_abs; + double final_sign = IS_NEG[(x_sign != y_sign) != recip]; + fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip]; + double q_d = num_d / den_d; + + double k_d = fputil::nearest_integer(q_d * 0x1.0p4); + int idx = static_cast<int>(k_d); + double r = 0.0; + +#ifdef LIBC_MATH_HAS_SMALL_TABLES + double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4); + r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx])); +#else + q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d); + + double p = atan_eval(q_d, idx); + r = final_sign * + fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]); +#endif // LIBC_MATH_HAS_SMALL_TABLES + +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return static_cast<float>(r); +#else + constexpr uint32_t LOWER_ERR = 4; + // Mask sticky bits in double precision before rounding to single precision. + constexpr uint32_t MASK = + mask_trailing_ones<uint32_t, fputil::FPBits<double>::SIG_LEN - + FPBits::SIG_LEN - 1>(); + constexpr uint32_t UPPER_ERR = MASK - LOWER_ERR; + + uint32_t r_bits = static_cast<uint32_t>(cpp::bit_cast<uint64_t>(r)) & MASK; + + // Ziv's rounding test. + if (LIBC_LIKELY(r_bits > LOWER_ERR && r_bits < UPPER_ERR)) + return static_cast<float>(r); + + return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign, + const_term); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F_H diff --git a/libc/src/__support/math/atan2f128.h b/libc/src/__support/math/atan2f128.h new file mode 100644 index 0000000..89efaf1 --- /dev/null +++ b/libc/src/__support/math/atan2f128.h @@ -0,0 +1,212 @@ +//===-- Implementation header for atan2f128 ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H + +#include "include/llvm-libc-types/float128.h" + +#ifdef LIBC_TYPES_HAS_FLOAT128 + +#include "atan_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/dyadic_float.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/integer_literals.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/uint128.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +// There are several range reduction steps we can take for atan2(y, x) as +// follow: + +// * Range reduction 1: signness +// atan2(y, x) will return a number between -PI and PI representing the angle +// forming by the 0x axis and the vector (x, y) on the 0xy-plane. +// In particular, we have that: +// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) +// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) +// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) +// Since atan function is odd, we can use the formula: +// atan(-u) = -atan(u) +// to adjust the above conditions a bit further: +// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) +// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) +// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) +// Which can be simplified to: +// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 +// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 + +// * Range reduction 2: reciprocal +// Now that the argument inside atan is positive, we can use the formula: +// atan(1/x) = pi/2 - atan(x) +// to make the argument inside atan <= 1 as follow: +// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x +// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| +// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x +// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| + +// * Range reduction 3: look up table. +// After the previous two range reduction steps, we reduce the problem to +// compute atan(u) with 0 <= u <= 1, or to be precise: +// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). +// An accurate polynomial approximation for the whole [0, 1] input range will +// require a very large degree. To make it more efficient, we reduce the input +// range further by finding an integer idx such that: +// | n/d - idx/64 | <= 1/128. +// In particular, +// idx := round(2^6 * n/d) +// Then for the fast pass, we find a polynomial approximation for: +// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) * Q(n/d - idx/64) +// For the accurate pass, we use the addition formula: +// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (n*idx)/(64*d)) ) +// = atan( (n - d*(idx/64))/(d + n*(idx/64)) ) +// And for the fast pass, we use degree-13 minimax polynomial to compute the +// RHS: +// atan(u) ~ P(u) = u - c_3 * u^3 + c_5 * u^5 - c_7 * u^7 + c_9 *u^9 - +// - c_11 * u^11 + c_13 * u^13 +// with absolute errors bounded by: +// |atan(u) - P(u)| < 2^-121 +// and relative errors bounded by: +// |(atan(u) - P(u)) / P(u)| < 2^-114. + +LIBC_INLINE static constexpr float128 atan2f128(float128 y, float128 x) { + using Float128 = fputil::DyadicFloat<128>; + + constexpr Float128 ZERO = {Sign::POS, 0, 0_u128}; + constexpr Float128 MZERO = {Sign::NEG, 0, 0_u128}; + constexpr Float128 PI = {Sign::POS, -126, + 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; + constexpr Float128 MPI = {Sign::NEG, -126, + 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; + constexpr Float128 PI_OVER_2 = {Sign::POS, -127, + 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; + constexpr Float128 MPI_OVER_2 = {Sign::NEG, -127, + 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; + constexpr Float128 PI_OVER_4 = {Sign::POS, -128, + 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; + constexpr Float128 THREE_PI_OVER_4 = { + Sign::POS, -128, 0x96cbe3f9'990e91a7'9394c9e8'a0a5159d_u128}; + + // Adjustment for constant term: + // CONST_ADJ[x_sign][y_sign][recip] + constexpr Float128 CONST_ADJ[2][2][2] = { + {{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}}, + {{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}}; + + using namespace atan_internal; + using FPBits = fputil::FPBits<float128>; + using Float128 = fputil::DyadicFloat<128>; + + FPBits x_bits(x), y_bits(y); + bool x_sign = x_bits.sign().is_neg(); + bool y_sign = y_bits.sign().is_neg(); + x_bits = x_bits.abs(); + y_bits = y_bits.abs(); + UInt128 x_abs = x_bits.uintval(); + UInt128 y_abs = y_bits.uintval(); + bool recip = x_abs < y_abs; + UInt128 min_abs = recip ? x_abs : y_abs; + UInt128 max_abs = !recip ? x_abs : y_abs; + unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN); + unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN); + + Float128 num(FPBits(min_abs).get_val()); + Float128 den(FPBits(max_abs).get_val()); + + // Check for exceptional cases, whether inputs are 0, inf, nan, or close to + // overflow, or close to underflow. + if (LIBC_UNLIKELY(max_exp >= 0x7fffU || min_exp == 0U)) { + if (x_bits.is_nan() || y_bits.is_nan()) + return FPBits::quiet_nan().get_val(); + unsigned x_except = x == 0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1); + unsigned y_except = y == 0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1); + + // Exceptional cases: + // EXCEPT[y_except][x_except][x_is_neg] + // with x_except & y_except: + // 0: zero + // 1: finite, non-zero + // 2: infinity + constexpr Float128 EXCEPTS[3][3][2] = { + {{ZERO, PI}, {ZERO, PI}, {ZERO, PI}}, + {{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}}, + {{PI_OVER_2, PI_OVER_2}, + {PI_OVER_2, PI_OVER_2}, + {PI_OVER_4, THREE_PI_OVER_4}}, + }; + + if ((x_except != 1) || (y_except != 1)) { + Float128 r = EXCEPTS[y_except][x_except][x_sign]; + if (y_sign) + r.sign = r.sign.negate(); + return static_cast<float128>(r); + } + } + + bool final_sign = ((x_sign != y_sign) != recip); + Float128 const_term = CONST_ADJ[x_sign][y_sign][recip]; + int exp_diff = den.exponent - num.exponent; + // We have the following bound for normalized n and d: + // 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1). + if (LIBC_UNLIKELY(exp_diff > FPBits::FRACTION_LEN + 2)) { + if (final_sign) + const_term.sign = const_term.sign.negate(); + return static_cast<float128>(const_term); + } + + // Take 24 leading bits of num and den to convert to float for fast division. + // We also multiply the numerator by 64 using integer addition directly to the + // exponent field. + float num_f = + cpp::bit_cast<float>(static_cast<uint32_t>(num.mantissa >> 104) + + (6U << fputil::FPBits<float>::FRACTION_LEN)); + float den_f = cpp::bit_cast<float>( + static_cast<uint32_t>(den.mantissa >> 104) + + (static_cast<uint32_t>(exp_diff) << fputil::FPBits<float>::FRACTION_LEN)); + + float k = fputil::nearest_integer(num_f / den_f); + unsigned idx = static_cast<unsigned>(k); + + // k_f128 = idx / 64 + Float128 k_f128(Sign::POS, -6, Float128::MantissaType(idx)); + + // Range reduction: + // atan(n/d) - atan(k) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) + // = atan((n - d * k/64)) / (d + n * k/64)) + // num_f128 = n - d * k/64 + Float128 num_f128 = fputil::multiply_add(den, -k_f128, num); + // den_f128 = d + n * k/64 + Float128 den_f128 = fputil::multiply_add(num, k_f128, den); + + // q = (n - d * k) / (d + n * k) + Float128 q = fputil::quick_mul(num_f128, fputil::approx_reciprocal(den_f128)); + // p ~ atan(q) + Float128 p = atan_eval(q); + + Float128 r = + fputil::quick_add(const_term, fputil::quick_add(ATAN_I_F128[idx], p)); + if (final_sign) + r.sign = r.sign.negate(); + + return static_cast<float128>(r); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT128 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATAN2F128_H diff --git a/libc/src/math/generic/atan2f_float.h b/libc/src/__support/math/atan2f_float.h index 1fd853d..8bd7095 100644 --- a/libc/src/math/generic/atan2f_float.h +++ b/libc/src/__support/math/atan2f_float.h @@ -1,4 +1,4 @@ -//===-- Single-precision atan2f function ----------------------------------===// +//===-- Single-precision atan2f float function ----------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,18 +6,21 @@ // //===----------------------------------------------------------------------===// +#ifndef LIBC_SRC___SUPPORT_MATH_ATAN2F_FLOAT_H +#define LIBC_SRC___SUPPORT_MATH_ATAN2F_FLOAT_H + #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/double_double.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" #include "src/__support/macros/config.h" #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/atan2f.h" namespace LIBC_NAMESPACE_DECL { -namespace { +namespace math { + +namespace atan2f_internal { using FloatFloat = fputil::FloatFloat; @@ -27,7 +30,7 @@ using FloatFloat = fputil::FloatFloat; // b = round(atan(i/16) - a, SG, RN); // print("{", b, ",", a, "},"); // }; -constexpr FloatFloat ATAN_I[17] = { +static constexpr FloatFloat ATAN_I[17] = { {0.0f, 0.0f}, {-0x1.1a6042p-30f, 0x1.ff55bcp-5f}, {-0x1.54f424p-30f, 0x1.fd5baap-4f}, @@ -57,7 +60,7 @@ constexpr FloatFloat ATAN_I[17] = { // For x = x_hi + x_lo, fully expand the polynomial and drop any terms less than // ulp(x_hi^3 / 3) gives us: // P(x) ~ x_hi - x_hi^3/3 + x_lo * (1 - x_hi^2) -FloatFloat atan_eval(const FloatFloat &x) { +LIBC_INLINE static constexpr FloatFloat atan_eval(const FloatFloat &x) { FloatFloat p; p.hi = x.hi; float x_hi_sq = x.hi * x.hi; @@ -70,7 +73,7 @@ FloatFloat atan_eval(const FloatFloat &x) { return p; } -} // anonymous namespace +} // namespace atan2f_internal // There are several range reduction steps we can take for atan2(y, x) as // follow: @@ -121,7 +124,8 @@ FloatFloat atan_eval(const FloatFloat &x) { // > dirtyinfnorm(atan(x) - P, [-2^-5, 2^-5]); // 0x1.995...p-28. -LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) { +LIBC_INLINE static constexpr float atan2f(float y, float x) { + using namespace atan2f_internal; using FPBits = typename fputil::FPBits<float>; constexpr float IS_NEG[2] = {1.0f, -1.0f}; constexpr FloatFloat ZERO = {0.0f, 0.0f}; @@ -234,4 +238,8 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) { return final_sign * r.hi; } +} // namespace math + } // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_SRC___SUPPORT_MATH_ATAN2F_FLOAT_H diff --git a/libc/src/__support/math/atanhf.h b/libc/src/__support/math/atanhf.h new file mode 100644 index 0000000..b3ee5bb --- /dev/null +++ b/libc/src/__support/math/atanhf.h @@ -0,0 +1,76 @@ +//===-- Implementation header for atanhf ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H + +#include "acoshf_utils.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float atanhf(float x) { + using namespace acoshf_internal; + using FPBits = typename fputil::FPBits<float>; + + FPBits xbits(x); + Sign sign = xbits.sign(); + uint32_t x_abs = xbits.abs().uintval(); + + // |x| >= 1.0 + if (LIBC_UNLIKELY(x_abs >= 0x3F80'0000U)) { + if (xbits.is_nan()) { + if (xbits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + return x; + } + // |x| == 1.0 + if (x_abs == 0x3F80'0000U) { + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_DIVBYZERO); + return FPBits::inf(sign).get_val(); + } else { + fputil::set_errno_if_required(EDOM); + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + } + + // |x| < ~0.10 + if (LIBC_UNLIKELY(x_abs <= 0x3dcc'0000U)) { + // |x| <= 2^-26 + if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { + return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x + 0x1.5555555555555p-2 * x * x * x)); + } + + double xdbl = x; + double x2 = xdbl * xdbl; + // Pure Taylor series. + double pe = fputil::polyeval(x2, 0.0, 0x1.5555555555555p-2, + 0x1.999999999999ap-3, 0x1.2492492492492p-3, + 0x1.c71c71c71c71cp-4, 0x1.745d1745d1746p-4); + return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl)); + } + double xdbl = x; + return static_cast<float>(0.5 * log_eval((xdbl + 1.0) / (xdbl - 1.0))); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANHF_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 6bcb1e2..bac043f 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3922,10 +3922,7 @@ add_entrypoint_object( HDRS ../atanhf.h DEPENDS - .explogxf - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.macros.optimization + libc.src.__support.math.atanhf ) add_entrypoint_object( @@ -4058,18 +4055,8 @@ add_entrypoint_object( atan2f.cpp HDRS ../atan2f.h - atan2f_float.h DEPENDS - libc.hdr.fenv_macros - libc.src.__support.FPUtil.double_double - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.math.inv_trigf_utils + libc.src.__support.math.atan2f ) add_entrypoint_object( @@ -4099,15 +4086,7 @@ add_entrypoint_object( HDRS ../atan2f128.h DEPENDS - libc.src.__support.math.atan_utils - libc.src.__support.integer_literals - libc.src.__support.uint128 - libc.src.__support.FPUtil.dyadic_float - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.macros.optimization - libc.src.__support.macros.properties.types + libc.src.__support.math.atan2f128 ) add_entrypoint_object( diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp index 32b977f..7c56788 100644 --- a/libc/src/math/generic/atan2f.cpp +++ b/libc/src/math/generic/atan2f.cpp @@ -7,336 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/atan2f.h" -#include "hdr/fenv_macros.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/double_double.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/math/inv_trigf_utils.h" - -#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \ - defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT) - -// We use float-float implementation to reduce size. -#include "src/math/generic/atan2f_float.h" - -#else +#include "src/__support/math/atan2f.h" namespace LIBC_NAMESPACE_DECL { -namespace { - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -// Look up tables for accurate pass: - -// atan(i/16) with i = 0..16, generated by Sollya with: -// > for i from 0 to 16 do { -// a = round(atan(i/16), D, RN); -// b = round(atan(i/16) - a, D, RN); -// print("{", b, ",", a, "},"); -// }; -constexpr fputil::DoubleDouble ATAN_I[17] = { - {0.0, 0.0}, - {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5}, - {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4}, - {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3}, - {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3}, - {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2}, - {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2}, - {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2}, - {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2}, - {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1}, - {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1}, - {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1}, - {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1}, - {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1}, - {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1}, - {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1}, - {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1}, -}; - -// Taylor polynomial, generated by Sollya with: -// > for i from 0 to 8 do { -// j = (-1)^(i + 1)/(2*i + 1); -// a = round(j, D, RN); -// b = round(j - a, D, RN); -// print("{", b, ",", a, "},"); -// }; -constexpr fputil::DoubleDouble COEFFS[9] = { - {0.0, 1.0}, // 1 - {-0x1.5555555555555p-56, -0x1.5555555555555p-2}, // -1/3 - {-0x1.999999999999ap-57, 0x1.999999999999ap-3}, // 1/5 - {-0x1.2492492492492p-57, -0x1.2492492492492p-3}, // -1/7 - {0x1.c71c71c71c71cp-58, 0x1.c71c71c71c71cp-4}, // 1/9 - {0x1.745d1745d1746p-59, -0x1.745d1745d1746p-4}, // -1/11 - {-0x1.3b13b13b13b14p-58, 0x1.3b13b13b13b14p-4}, // 1/13 - {-0x1.1111111111111p-60, -0x1.1111111111111p-4}, // -1/15 - {0x1.e1e1e1e1e1e1ep-61, 0x1.e1e1e1e1e1e1ep-5}, // 1/17 -}; - -// Veltkamp's splitting of a double precision into hi + lo, where the hi part is -// slightly smaller than an even split, so that the product of -// hi * (s1 * k + s2) is exact, -// where: -// s1, s2 are single precsion, -// 1/16 <= s1/s2 <= 1 -// 1/16 <= k <= 1 is an integer. -// So the maximal precision of (s1 * k + s2) is: -// prec(s1 * k + s2) = 2 + log2(msb(s2)) - log2(lsb(k_d * s1)) -// = 2 + log2(msb(s1)) + 4 - log2(lsb(k_d)) - log2(lsb(s1)) -// = 2 + log2(lsb(s1)) + 23 + 4 - (-4) - log2(lsb(s1)) -// = 33. -// Thus, the Veltkamp splitting constant is C = 2^33 + 1. -// This is used when FMA instruction is not available. -[[maybe_unused]] constexpr fputil::DoubleDouble split_d(double a) { - fputil::DoubleDouble r{0.0, 0.0}; - constexpr double C = 0x1.0p33 + 1.0; - double t1 = C * a; - double t2 = a - t1; - r.hi = t1 + t2; - r.lo = a - r.hi; - return r; -} - -// Compute atan( num_d / den_d ) in double-double precision. -// num_d = min(|x|, |y|) -// den_d = max(|x|, |y|) -// q_d = num_d / den_d -// idx, k_d = round( 2^4 * num_d / den_d ) -// final_sign = sign of the final result -// const_term = the constant term in the final expression. -float atan2f_double_double(double num_d, double den_d, double q_d, int idx, - double k_d, double final_sign, - const fputil::DoubleDouble &const_term) { - fputil::DoubleDouble q; - double num_r, den_r; - - if (idx != 0) { - // The following range reduction is accurate even without fma for - // 1/16 <= n/d <= 1. - // atan(n/d) - atan(idx/16) = atan((n/d - idx/16) / (1 + (n/d) * (idx/16))) - // = atan((n - d*(idx/16)) / (d + n*idx/16)) - k_d *= 0x1.0p-4; - num_r = fputil::multiply_add(k_d, -den_d, num_d); // Exact - den_r = fputil::multiply_add(k_d, num_d, den_d); // Exact - q.hi = num_r / den_r; - } else { - // For 0 < n/d < 1/16, we just need to calculate the lower part of their - // quotient. - q.hi = q_d; - num_r = num_d; - den_r = den_d; - } -#ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE - q.lo = fputil::multiply_add(q.hi, -den_r, num_r) / den_r; -#else - // Compute `(num_r - q.hi * den_r) / den_r` accurately without FMA - // instructions. - fputil::DoubleDouble q_hi_dd = split_d(q.hi); - double t1 = fputil::multiply_add(q_hi_dd.hi, -den_r, num_r); // Exact - double t2 = fputil::multiply_add(q_hi_dd.lo, -den_r, t1); - q.lo = t2 / den_r; -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE - - // Taylor polynomial, evaluating using Horner's scheme: - // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15 - // + x^17/17 - // = x*(1 + x^2*(-1/3 + x^2*(1/5 + x^2*(-1/7 + x^2*(1/9 + x^2* - // *(-1/11 + x^2*(1/13 + x^2*(-1/15 + x^2 * 1/17)))))))) - fputil::DoubleDouble q2 = fputil::quick_mult(q, q); - fputil::DoubleDouble p_dd = - fputil::polyeval(q2, COEFFS[0], COEFFS[1], COEFFS[2], COEFFS[3], - COEFFS[4], COEFFS[5], COEFFS[6], COEFFS[7], COEFFS[8]); - fputil::DoubleDouble r_dd = - fputil::add(const_term, fputil::multiply_add(q, p_dd, ATAN_I[idx])); - r_dd.hi *= final_sign; - r_dd.lo *= final_sign; - - // Make sure the sum is normalized: - fputil::DoubleDouble rr = fputil::exact_add(r_dd.hi, r_dd.lo); - // Round to odd. - uint64_t rr_bits = cpp::bit_cast<uint64_t>(rr.hi); - if (LIBC_UNLIKELY(((rr_bits & 0xfff'ffff) == 0) && (rr.lo != 0.0))) { - Sign hi_sign = fputil::FPBits<double>(rr.hi).sign(); - Sign lo_sign = fputil::FPBits<double>(rr.lo).sign(); - if (hi_sign == lo_sign) { - ++rr_bits; - } else if ((rr_bits & fputil::FPBits<double>::FRACTION_MASK) > 0) { - --rr_bits; - } - } - - return static_cast<float>(cpp::bit_cast<double>(rr_bits)); -} - -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - -} // anonymous namespace - -// There are several range reduction steps we can take for atan2(y, x) as -// follow: - -// * Range reduction 1: signness -// atan2(y, x) will return a number between -PI and PI representing the angle -// forming by the 0x axis and the vector (x, y) on the 0xy-plane. -// In particular, we have that: -// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) -// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) -// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) -// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) -// Since atan function is odd, we can use the formula: -// atan(-u) = -atan(u) -// to adjust the above conditions a bit further: -// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) -// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) -// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) -// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) -// Which can be simplified to: -// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 -// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 - -// * Range reduction 2: reciprocal -// Now that the argument inside atan is positive, we can use the formula: -// atan(1/x) = pi/2 - atan(x) -// to make the argument inside atan <= 1 as follow: -// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x -// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| -// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x -// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| - -// * Range reduction 3: look up table. -// After the previous two range reduction steps, we reduce the problem to -// compute atan(u) with 0 <= u <= 1, or to be precise: -// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). -// An accurate polynomial approximation for the whole [0, 1] input range will -// require a very large degree. To make it more efficient, we reduce the input -// range further by finding an integer idx such that: -// | n/d - idx/16 | <= 1/32. -// In particular, -// idx := 2^-4 * round(2^4 * n/d) -// Then for the fast pass, we find a polynomial approximation for: -// atan( n/d ) ~ atan( idx/16 ) + (n/d - idx/16) * Q(n/d - idx/16) -// For the accurate pass, we use the addition formula: -// atan( n/d ) - atan( idx/16 ) = atan( (n/d - idx/16)/(1 + (n*idx)/(16*d)) ) -// = atan( (n - d * idx/16)/(d + n * idx/16) ) -// And finally we use Taylor polynomial to compute the RHS in the accurate pass: -// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9 - u^11/11 + u^13/13 - -// - u^15/15 + u^17/17 -// It's error in double-double precision is estimated in Sollya to be: -// > P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15 -// + x^17/17; -// > dirtyinfnorm(atan(x) - P, [-2^-5, 2^-5]); -// 0x1.aec6f...p-100 -// which is about rounding errors of double-double (2^-104). - LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) { - using namespace inv_trigf_utils_internal; - using FPBits = typename fputil::FPBits<float>; - constexpr double IS_NEG[2] = {1.0, -1.0}; - constexpr double PI = 0x1.921fb54442d18p1; - constexpr double PI_LO = 0x1.1a62633145c07p-53; - constexpr double PI_OVER_4 = 0x1.921fb54442d18p-1; - constexpr double PI_OVER_2 = 0x1.921fb54442d18p0; - constexpr double THREE_PI_OVER_4 = 0x1.2d97c7f3321d2p+1; - // Adjustment for constant term: - // CONST_ADJ[x_sign][y_sign][recip] - constexpr fputil::DoubleDouble CONST_ADJ[2][2][2] = { - {{{0.0, 0.0}, {-PI_LO / 2, -PI_OVER_2}}, - {{-0.0, -0.0}, {-PI_LO / 2, -PI_OVER_2}}}, - {{{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}, - {{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}}}; - - FPBits x_bits(x), y_bits(y); - bool x_sign = x_bits.sign().is_neg(); - bool y_sign = y_bits.sign().is_neg(); - x_bits.set_sign(Sign::POS); - y_bits.set_sign(Sign::POS); - uint32_t x_abs = x_bits.uintval(); - uint32_t y_abs = y_bits.uintval(); - uint32_t max_abs = x_abs > y_abs ? x_abs : y_abs; - uint32_t min_abs = x_abs <= y_abs ? x_abs : y_abs; - float num_f = FPBits(min_abs).get_val(); - float den_f = FPBits(max_abs).get_val(); - double num_d = static_cast<double>(num_f); - double den_d = static_cast<double>(den_f); - - if (LIBC_UNLIKELY(max_abs >= 0x7f80'0000U || num_d == 0.0)) { - if (x_bits.is_nan() || y_bits.is_nan()) { - if (x_bits.is_signaling_nan() || y_bits.is_signaling_nan()) - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - double x_d = static_cast<double>(x); - double y_d = static_cast<double>(y); - size_t x_except = (x_d == 0.0) ? 0 : (x_abs == 0x7f80'0000 ? 2 : 1); - size_t y_except = (y_d == 0.0) ? 0 : (y_abs == 0x7f80'0000 ? 2 : 1); - - // Exceptional cases: - // EXCEPT[y_except][x_except][x_is_neg] - // with x_except & y_except: - // 0: zero - // 1: finite, non-zero - // 2: infinity - constexpr double EXCEPTS[3][3][2] = { - {{0.0, PI}, {0.0, PI}, {0.0, PI}}, - {{PI_OVER_2, PI_OVER_2}, {0.0, 0.0}, {0.0, PI}}, - {{PI_OVER_2, PI_OVER_2}, - {PI_OVER_2, PI_OVER_2}, - {PI_OVER_4, THREE_PI_OVER_4}}, - }; - - double r = IS_NEG[y_sign] * EXCEPTS[y_except][x_except][x_sign]; - - return static_cast<float>(r); - } - - bool recip = x_abs < y_abs; - double final_sign = IS_NEG[(x_sign != y_sign) != recip]; - fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip]; - double q_d = num_d / den_d; - - double k_d = fputil::nearest_integer(q_d * 0x1.0p4); - int idx = static_cast<int>(k_d); - double r; - -#ifdef LIBC_MATH_HAS_SMALL_TABLES - double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4); - r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx])); -#else - q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d); - - double p = atan_eval(q_d, idx); - r = final_sign * - fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]); -#endif // LIBC_MATH_HAS_SMALL_TABLES - -#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - return static_cast<float>(r); -#else - constexpr uint32_t LOWER_ERR = 4; - // Mask sticky bits in double precision before rounding to single precision. - constexpr uint32_t MASK = - mask_trailing_ones<uint32_t, fputil::FPBits<double>::SIG_LEN - - FPBits::SIG_LEN - 1>(); - constexpr uint32_t UPPER_ERR = MASK - LOWER_ERR; - - uint32_t r_bits = static_cast<uint32_t>(cpp::bit_cast<uint64_t>(r)) & MASK; - - // Ziv's rounding test. - if (LIBC_LIKELY(r_bits > LOWER_ERR && r_bits < UPPER_ERR)) - return static_cast<float>(r); - - return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign, - const_term); -#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return math::atan2f(y, x); } } // namespace LIBC_NAMESPACE_DECL - -#endif diff --git a/libc/src/math/generic/atan2f128.cpp b/libc/src/math/generic/atan2f128.cpp index 8838d94..ec051dd 100644 --- a/libc/src/math/generic/atan2f128.cpp +++ b/libc/src/math/generic/atan2f128.cpp @@ -7,198 +7,12 @@ //===----------------------------------------------------------------------===// #include "src/math/atan2f128.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/dyadic_float.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/integer_literals.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/types.h" -#include "src/__support/math/atan_utils.h" -#include "src/__support/uint128.h" +#include "src/__support/math/atan2f128.h" namespace LIBC_NAMESPACE_DECL { -namespace { - -using Float128 = fputil::DyadicFloat<128>; - -static constexpr Float128 ZERO = {Sign::POS, 0, 0_u128}; -static constexpr Float128 MZERO = {Sign::NEG, 0, 0_u128}; -static constexpr Float128 PI = {Sign::POS, -126, - 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -static constexpr Float128 MPI = {Sign::NEG, -126, - 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -static constexpr Float128 PI_OVER_2 = { - Sign::POS, -127, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -static constexpr Float128 MPI_OVER_2 = { - Sign::NEG, -127, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -static constexpr Float128 PI_OVER_4 = { - Sign::POS, -128, 0xc90fdaa2'2168c234'c4c6628b'80dc1cd1_u128}; -static constexpr Float128 THREE_PI_OVER_4 = { - Sign::POS, -128, 0x96cbe3f9'990e91a7'9394c9e8'a0a5159d_u128}; - -// Adjustment for constant term: -// CONST_ADJ[x_sign][y_sign][recip] -static constexpr Float128 CONST_ADJ[2][2][2] = { - {{ZERO, MPI_OVER_2}, {MZERO, MPI_OVER_2}}, - {{MPI, PI_OVER_2}, {MPI, PI_OVER_2}}}; - -} // anonymous namespace - -// There are several range reduction steps we can take for atan2(y, x) as -// follow: - -// * Range reduction 1: signness -// atan2(y, x) will return a number between -PI and PI representing the angle -// forming by the 0x axis and the vector (x, y) on the 0xy-plane. -// In particular, we have that: -// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) -// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) -// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) -// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) -// Since atan function is odd, we can use the formula: -// atan(-u) = -atan(u) -// to adjust the above conditions a bit further: -// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) -// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) -// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) -// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) -// Which can be simplified to: -// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 -// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 - -// * Range reduction 2: reciprocal -// Now that the argument inside atan is positive, we can use the formula: -// atan(1/x) = pi/2 - atan(x) -// to make the argument inside atan <= 1 as follow: -// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x -// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| -// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x -// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| - -// * Range reduction 3: look up table. -// After the previous two range reduction steps, we reduce the problem to -// compute atan(u) with 0 <= u <= 1, or to be precise: -// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). -// An accurate polynomial approximation for the whole [0, 1] input range will -// require a very large degree. To make it more efficient, we reduce the input -// range further by finding an integer idx such that: -// | n/d - idx/64 | <= 1/128. -// In particular, -// idx := round(2^6 * n/d) -// Then for the fast pass, we find a polynomial approximation for: -// atan( n/d ) ~ atan( idx/64 ) + (n/d - idx/64) * Q(n/d - idx/64) -// For the accurate pass, we use the addition formula: -// atan( n/d ) - atan( idx/64 ) = atan( (n/d - idx/64)/(1 + (n*idx)/(64*d)) ) -// = atan( (n - d*(idx/64))/(d + n*(idx/64)) ) -// And for the fast pass, we use degree-13 minimax polynomial to compute the -// RHS: -// atan(u) ~ P(u) = u - c_3 * u^3 + c_5 * u^5 - c_7 * u^7 + c_9 *u^9 - -// - c_11 * u^11 + c_13 * u^13 -// with absolute errors bounded by: -// |atan(u) - P(u)| < 2^-121 -// and relative errors bounded by: -// |(atan(u) - P(u)) / P(u)| < 2^-114. - LLVM_LIBC_FUNCTION(float128, atan2f128, (float128 y, float128 x)) { - using namespace atan_internal; - using FPBits = fputil::FPBits<float128>; - using Float128 = fputil::DyadicFloat<128>; - - FPBits x_bits(x), y_bits(y); - bool x_sign = x_bits.sign().is_neg(); - bool y_sign = y_bits.sign().is_neg(); - x_bits = x_bits.abs(); - y_bits = y_bits.abs(); - UInt128 x_abs = x_bits.uintval(); - UInt128 y_abs = y_bits.uintval(); - bool recip = x_abs < y_abs; - UInt128 min_abs = recip ? x_abs : y_abs; - UInt128 max_abs = !recip ? x_abs : y_abs; - unsigned min_exp = static_cast<unsigned>(min_abs >> FPBits::FRACTION_LEN); - unsigned max_exp = static_cast<unsigned>(max_abs >> FPBits::FRACTION_LEN); - - Float128 num(FPBits(min_abs).get_val()); - Float128 den(FPBits(max_abs).get_val()); - - // Check for exceptional cases, whether inputs are 0, inf, nan, or close to - // overflow, or close to underflow. - if (LIBC_UNLIKELY(max_exp >= 0x7fffU || min_exp == 0U)) { - if (x_bits.is_nan() || y_bits.is_nan()) - return FPBits::quiet_nan().get_val(); - unsigned x_except = x == 0 ? 0 : (FPBits(x_abs).is_inf() ? 2 : 1); - unsigned y_except = y == 0 ? 0 : (FPBits(y_abs).is_inf() ? 2 : 1); - - // Exceptional cases: - // EXCEPT[y_except][x_except][x_is_neg] - // with x_except & y_except: - // 0: zero - // 1: finite, non-zero - // 2: infinity - constexpr Float128 EXCEPTS[3][3][2] = { - {{ZERO, PI}, {ZERO, PI}, {ZERO, PI}}, - {{PI_OVER_2, PI_OVER_2}, {ZERO, ZERO}, {ZERO, PI}}, - {{PI_OVER_2, PI_OVER_2}, - {PI_OVER_2, PI_OVER_2}, - {PI_OVER_4, THREE_PI_OVER_4}}, - }; - - if ((x_except != 1) || (y_except != 1)) { - Float128 r = EXCEPTS[y_except][x_except][x_sign]; - if (y_sign) - r.sign = r.sign.negate(); - return static_cast<float128>(r); - } - } - - bool final_sign = ((x_sign != y_sign) != recip); - Float128 const_term = CONST_ADJ[x_sign][y_sign][recip]; - int exp_diff = den.exponent - num.exponent; - // We have the following bound for normalized n and d: - // 2^(-exp_diff - 1) < n/d < 2^(-exp_diff + 1). - if (LIBC_UNLIKELY(exp_diff > FPBits::FRACTION_LEN + 2)) { - if (final_sign) - const_term.sign = const_term.sign.negate(); - return static_cast<float128>(const_term); - } - - // Take 24 leading bits of num and den to convert to float for fast division. - // We also multiply the numerator by 64 using integer addition directly to the - // exponent field. - float num_f = - cpp::bit_cast<float>(static_cast<uint32_t>(num.mantissa >> 104) + - (6U << fputil::FPBits<float>::FRACTION_LEN)); - float den_f = cpp::bit_cast<float>( - static_cast<uint32_t>(den.mantissa >> 104) + - (static_cast<uint32_t>(exp_diff) << fputil::FPBits<float>::FRACTION_LEN)); - - float k = fputil::nearest_integer(num_f / den_f); - unsigned idx = static_cast<unsigned>(k); - - // k_f128 = idx / 64 - Float128 k_f128(Sign::POS, -6, Float128::MantissaType(idx)); - - // Range reduction: - // atan(n/d) - atan(k) = atan((n/d - k/64) / (1 + (n/d) * (k/64))) - // = atan((n - d * k/64)) / (d + n * k/64)) - // num_f128 = n - d * k/64 - Float128 num_f128 = fputil::multiply_add(den, -k_f128, num); - // den_f128 = d + n * k/64 - Float128 den_f128 = fputil::multiply_add(num, k_f128, den); - - // q = (n - d * k) / (d + n * k) - Float128 q = fputil::quick_mul(num_f128, fputil::approx_reciprocal(den_f128)); - // p ~ atan(q) - Float128 p = atan_eval(q); - - Float128 r = - fputil::quick_add(const_term, fputil::quick_add(ATAN_I_F128[idx], p)); - if (final_sign) - r.sign = r.sign.negate(); - - return static_cast<float128>(r); + return math::atan2f128(y, x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/atanhf.cpp b/libc/src/math/generic/atanhf.cpp index 602a8f0..81706190 100644 --- a/libc/src/math/generic/atanhf.cpp +++ b/libc/src/math/generic/atanhf.cpp @@ -7,62 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/atanhf.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/math/generic/explogxf.h" +#include "src/__support/math/atanhf.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, atanhf, (float x)) { - using namespace acoshf_internal; - using FPBits = typename fputil::FPBits<float>; - - FPBits xbits(x); - Sign sign = xbits.sign(); - uint32_t x_abs = xbits.abs().uintval(); - - // |x| >= 1.0 - if (LIBC_UNLIKELY(x_abs >= 0x3F80'0000U)) { - if (xbits.is_nan()) { - if (xbits.is_signaling_nan()) { - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - return x; - } - // |x| == 1.0 - if (x_abs == 0x3F80'0000U) { - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_DIVBYZERO); - return FPBits::inf(sign).get_val(); - } else { - fputil::set_errno_if_required(EDOM); - fputil::raise_except_if_required(FE_INVALID); - return FPBits::quiet_nan().get_val(); - } - } - - // |x| < ~0.10 - if (LIBC_UNLIKELY(x_abs <= 0x3dcc'0000U)) { - // |x| <= 2^-26 - if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return static_cast<float>(LIBC_UNLIKELY(x_abs == 0) - ? x - : (x + 0x1.5555555555555p-2 * x * x * x)); - } - - double xdbl = x; - double x2 = xdbl * xdbl; - // Pure Taylor series. - double pe = fputil::polyeval(x2, 0.0, 0x1.5555555555555p-2, - 0x1.999999999999ap-3, 0x1.2492492492492p-3, - 0x1.c71c71c71c71cp-4, 0x1.745d1745d1746p-4); - return static_cast<float>(fputil::multiply_add(xdbl, pe, xdbl)); - } - double xdbl = x; - return static_cast<float>(0.5 * log_eval((xdbl + 1.0) / (xdbl - 1.0))); -} +LLVM_LIBC_FUNCTION(float, atanhf, (float x)) { return math::atanhf(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/sched/linux/CMakeLists.txt b/libc/src/sched/linux/CMakeLists.txt index bb50002..ceb755f 100644 --- a/libc/src/sched/linux/CMakeLists.txt +++ b/libc/src/sched/linux/CMakeLists.txt @@ -5,7 +5,6 @@ add_entrypoint_object( HDRS ../getcpu.h DEPENDS - libc.include.sched libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -69,9 +68,10 @@ add_entrypoint_object( HDRS ../sched_setparam.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.types.struct_sched_param libc.include.sys_syscall libc.include.time - libc.include.sched libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -83,9 +83,10 @@ add_entrypoint_object( HDRS ../sched_getparam.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.types.struct_sched_param libc.include.sys_syscall libc.include.time - libc.include.sched libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -97,9 +98,10 @@ add_entrypoint_object( HDRS ../sched_setscheduler.h DEPENDS + libc.hdr.types.pid_t + libc.hdr.types.struct_sched_param libc.include.sys_syscall libc.include.time - libc.include.sched libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/sched/sched_getparam.h b/libc/src/sched/sched_getparam.h index e1b2365..00defdf 100644 --- a/libc/src/sched/sched_getparam.h +++ b/libc/src/sched/sched_getparam.h @@ -10,7 +10,9 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_GETPARAM_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/pid_t.h" +#include "hdr/types/struct_sched_param.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_setparam.h b/libc/src/sched/sched_setparam.h index e4691a7..5a69b09 100644 --- a/libc/src/sched/sched_setparam.h +++ b/libc/src/sched/sched_setparam.h @@ -10,7 +10,9 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_SETPARAM_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/pid_t.h" +#include "hdr/types/struct_sched_param.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/sched/sched_setscheduler.h b/libc/src/sched/sched_setscheduler.h index e745002..c5cb148 100644 --- a/libc/src/sched/sched_setscheduler.h +++ b/libc/src/sched/sched_setscheduler.h @@ -10,7 +10,9 @@ #define LLVM_LIBC_SRC_SCHED_SCHED_SETSCHEDULER_H #include "src/__support/macros/config.h" -#include <sched.h> + +#include "hdr/types/pid_t.h" +#include "hdr/types/struct_sched_param.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index ed4f0aa..85513a6 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -27,17 +27,22 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, wchar_t *tok_start = str; while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start)) ++tok_start; + if (*tok_start == L'\0') { + *context = nullptr; + return nullptr; + } wchar_t *tok_end = tok_start; while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end)) ++tok_end; - if (*tok_end != L'\0') { + if (*tok_end == L'\0') { + *context = nullptr; + } else { *tok_end = L'\0'; - ++tok_end; + *context = tok_end + 1; } - *context = tok_end; - return *tok_start == L'\0' ? nullptr : tok_start; + return tok_start; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index 4ed32d4..34236ad 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -20,8 +20,11 @@ add_fp_unittest( libc.src.__support.math.asinhf16 libc.src.__support.math.atan libc.src.__support.math.atan2 + libc.src.__support.math.atan2f + libc.src.__support.math.atan2f128 libc.src.__support.math.atanf libc.src.__support.math.atanf16 + libc.src.__support.math.atanhf libc.src.__support.math.erff libc.src.__support.math.exp libc.src.__support.math.exp10 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index cd72df4..ffe1a24 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -45,7 +45,9 @@ TEST(LlvmLibcSharedMathTest, AllFloat) { EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::acoshf(1.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinf(0.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::asinhf(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atan2f(0.0f, 0.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanf(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::atanhf(0.0f)); EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::erff(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp10f(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f)); @@ -72,6 +74,8 @@ TEST(LlvmLibcSharedMathTest, AllDouble) { TEST(LlvmLibcSharedMathTest, AllFloat128) { int exponent; + EXPECT_FP_EQ(float128(0x0p+0), + LIBC_NAMESPACE::shared::atan2f128(float128(0.0), float128(0.0))); EXPECT_FP_EQ_ALL_ROUNDING(float128(0.75), LIBC_NAMESPACE::shared::frexpf128( float128(24), &exponent)); EXPECT_EQ(exponent, 5); diff --git a/libc/test/src/sched/CMakeLists.txt b/libc/test/src/sched/CMakeLists.txt index f6151d0..362c526 100644 --- a/libc/test/src/sched/CMakeLists.txt +++ b/libc/test/src/sched/CMakeLists.txt @@ -48,7 +48,6 @@ add_libc_unittest( SRCS getcpu_test.cpp DEPENDS - libc.include.sched libc.src.errno.errno libc.src.sched.getcpu libc.test.UnitTest.ErrnoCheckingTest @@ -61,7 +60,8 @@ add_libc_unittest( SRCS param_and_scheduler_test.cpp DEPENDS - libc.include.sched + libc.hdr.sched_macros + libc.hdr.types.struct_sched_param libc.src.errno.errno libc.src.sched.sched_getscheduler libc.src.sched.sched_setscheduler @@ -79,6 +79,7 @@ add_libc_unittest( SRCS sched_rr_get_interval_test.cpp DEPENDS + libc.hdr.sched_macros libc.hdr.types.struct_timespec libc.src.errno.errno libc.src.sched.sched_getscheduler diff --git a/libc/test/src/sched/param_and_scheduler_test.cpp b/libc/test/src/sched/param_and_scheduler_test.cpp index 4f2b6e4..b8ee123 100644 --- a/libc/test/src/sched/param_and_scheduler_test.cpp +++ b/libc/test/src/sched/param_and_scheduler_test.cpp @@ -16,7 +16,8 @@ #include "src/unistd/getuid.h" #include "test/UnitTest/Test.h" -#include <sched.h> +#include "hdr/sched_macros.h" +#include "hdr/types/struct_sched_param.h" // We Test: // SCHED_OTHER, SCHED_FIFO, SCHED_RR diff --git a/libc/test/src/sched/sched_rr_get_interval_test.cpp b/libc/test/src/sched/sched_rr_get_interval_test.cpp index 272cf86..e5dc4e3 100644 --- a/libc/test/src/sched/sched_rr_get_interval_test.cpp +++ b/libc/test/src/sched/sched_rr_get_interval_test.cpp @@ -14,6 +14,7 @@ #include "src/unistd/getuid.h" #include "test/UnitTest/Test.h" +#include "hdr/sched_macros.h" #include "hdr/types/struct_timespec.h" TEST(LlvmLibcSchedRRGetIntervalTest, SmokeTest) { diff --git a/libc/test/src/wchar/wcstok_test.cpp b/libc/test/src/wchar/wcstok_test.cpp index 7106e9f..3bb1014 100644 --- a/libc/test/src/wchar/wcstok_test.cpp +++ b/libc/test/src/wchar/wcstok_test.cpp @@ -19,6 +19,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { // Another call to ensure that 'reserve' is not in a bad state. ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr); + // Subsequent searches still return nullptr. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr); } { // Empty source and single character delimiter string. wchar_t empty[] = L""; @@ -27,6 +29,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { // Another call to ensure that 'reserve' is not in a bad state. ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); + // Subsequent searches still return nullptr. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); } { // Same character source and delimiter string. wchar_t single[] = L"_"; @@ -35,6 +39,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { // Another call to ensure that 'reserve' is not in a bad state. ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); + // Subsequent searches still return nullptr. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr); } { // Multiple character source and single character delimiter string. wchar_t multiple[] = L"1,2"; @@ -51,6 +57,8 @@ TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) { ASSERT_TRUE(tok[2] == L'2'); ASSERT_TRUE(tok[3] == L'\0'); ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr); + // Subsequent searches still return nullptr. + ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr); } } diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 328dfcf..5b95edc 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -42,6 +42,10 @@ set( LIBCLC_TARGETS_TO_BUILD "all" option( ENABLE_RUNTIME_SUBNORMAL "Enable runtime linking of subnormal support." OFF ) +option( + LIBCLC_USE_SPIRV_BACKEND "Build SPIR-V targets with the SPIR-V backend." OFF +) + # Top level target used to build all Libclc libraries. add_custom_target( libclc ALL ) @@ -115,14 +119,17 @@ foreach( tool IN ITEMS clang opt llvm-as llvm-link ) endif() endforeach() -# llvm-spirv is an optional dependency, used to build spirv-* targets. -# It may be provided in-tree or externally. -if( TARGET llvm-spirv ) - get_host_tool_path( llvm-spirv LLVM_SPIRV llvm-spirv_exe llvm-spirv_target ) -else() - find_program( LLVM_SPIRV llvm-spirv HINTS ${LLVM_TOOLS_BINARY_DIR} ) - set( llvm-spirv_exe "${LLVM_SPIRV}" ) - set( llvm-spirv_target ) +if( NOT LIBCLC_USE_SPIRV_BACKEND ) + # llvm-spirv is an optional dependency, used to build spirv-* targets when + # the SPIR-V backend hasn't been requested. It may be provided in-tree or + # externally. + if( TARGET llvm-spirv ) + get_host_tool_path( llvm-spirv LLVM_SPIRV llvm-spirv_exe llvm-spirv_target ) + else() + find_program( LLVM_SPIRV llvm-spirv HINTS ${LLVM_TOOLS_BINARY_DIR} ) + set( llvm-spirv_exe "${LLVM_SPIRV}" ) + set( llvm-spirv_target ) + endif() endif() # List of all targets. Note that some are added dynamically below. @@ -138,22 +145,24 @@ set( LIBCLC_TARGETS_ALL nvptx64--nvidiacl ) -# mesa3d environment is only available since LLVM 4.0 +# The mesa3d environment is only available since LLVM 4.0 if( LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 4.0.0 ) list( APPEND LIBCLC_TARGETS_ALL amdgcn-mesa-mesa3d ) endif() -# spirv-mesa3d and spirv64-mesa3d targets can only be built with the (optional) -# llvm-spirv external tool. -if( llvm-spirv_exe ) - list( APPEND LIBCLC_TARGETS_ALL spirv-mesa3d- spirv64-mesa3d- ) +# The spirv-mesa3d and spirv64-mesa3d targets are optional and can be built +# with either the LLVM SPIR-V backend or the external llvm-spirv tool. +if( LIBCLC_USE_SPIRV_BACKEND OR llvm-spirv_exe ) + list( APPEND LIBCLC_TARGETS_ALL spirv-mesa3d- spirv64-mesa3d- ) endif() # Verify that the user hasn't requested mesa3d targets without an available # llvm-spirv tool. -if( "spirv-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD OR "spirv64-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD ) - if( NOT llvm-spirv_exe ) - message( FATAL_ERROR "SPIR-V targets requested, but spirv-tools is not installed" ) +if( spirv-mesa3d- IN_LIST LIBCLC_TARGETS_TO_BUILD + OR spirv64-mesa3d- IN_LIST LIBCLC_TARGETS_TO_BUILD ) + if( NOT LIBCLC_USE_SPIRV_BACKEND AND NOT llvm-spirv_exe ) + message( FATAL_ERROR "SPIR-V targets requested, but spirv-tools is not " + "installed and the SPIR-V backend has not been requested." ) endif() endif() diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 9b0e5d9..47185586 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -164,7 +164,9 @@ function(get_libclc_device_info) list( GET TRIPLE 0 ARCH ) # Some targets don't have a specific device architecture to target - if( ARG_DEVICE STREQUAL none OR ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 ) + if( ARG_DEVICE STREQUAL none + OR ((ARCH STREQUAL spirv OR ARCH STREQUAL spirv64) + AND NOT LIBCLC_USE_SPIRV_BACKEND) ) set( cpu ) set( arch_suffix "${ARG_TRIPLE}" ) else() @@ -182,7 +184,11 @@ function(get_libclc_device_info) # Some libclc targets are not real clang triples: return their canonical # triples. - if( ARCH STREQUAL spirv OR ARCH STREQUAL clspv ) + if( ARCH STREQUAL spirv AND LIBCLC_USE_SPIRV_BACKEND ) + set( ARG_TRIPLE "spirv32--" ) + elseif( ARCH STREQUAL spirv64 AND LIBCLC_USE_SPIRV_BACKEND ) + set( ARG_TRIPLE "spirv64--" ) + elseif( ARCH STREQUAL spirv OR ARCH STREQUAL clspv ) set( ARG_TRIPLE "spir--" ) elseif( ARCH STREQUAL spirv64 OR ARCH STREQUAL clspv64 ) set( ARG_TRIPLE "spir64--" ) @@ -363,10 +369,17 @@ function(add_libclc_builtin_set) if( ARG_ARCH STREQUAL spirv OR ARG_ARCH STREQUAL spirv64 ) set( obj_suffix ${ARG_ARCH_SUFFIX}.spv ) set( libclc_builtins_lib ${LIBCLC_OUTPUT_LIBRARY_DIR}/${obj_suffix} ) - add_custom_command( OUTPUT ${libclc_builtins_lib} - COMMAND ${llvm-spirv_exe} ${spvflags} -o ${libclc_builtins_lib} ${builtins_link_lib} - DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) + if ( LIBCLC_USE_SPIRV_BACKEND ) + add_custom_command( OUTPUT ${libclc_builtins_lib} + COMMAND ${clang_exe} --target=${ARG_TRIPLE} -x ir -o ${libclc_builtins_lib} ${builtins_link_lib} + DEPENDS ${clang_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + else() + add_custom_command( OUTPUT ${libclc_builtins_lib} + COMMAND ${llvm-spirv_exe} ${spvflags} -o ${libclc_builtins_lib} ${builtins_link_lib} + DEPENDS ${llvm-spirv_target} ${builtins_link_lib} ${builtins_link_lib_tgt} + ) + endif() else() # Non-SPIR-V targets add an extra step to optimize the bytecode set( builtins_opt_lib_tgt builtins.opt.${ARG_ARCH_SUFFIX} ) diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 6180572..a36848e 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -432,7 +432,7 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_constexpr_queue`` ``202502L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_constrained_equality`` *unimplemented* + ``__cpp_lib_constrained_equality`` ``202411L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_copyable_function`` *unimplemented* ---------------------------------------------------------- ----------------- diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 74bfa97..91123ff 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -53,6 +53,8 @@ Implemented Papers - P2711R1: Making multi-param constructors of ``views`` ``explicit`` (`Github <https://github.com/llvm/llvm-project/issues/105252>`__) - P2770R0: Stashing stashing ``iterators`` for proper flattening (`Github <https://github.com/llvm/llvm-project/issues/105250>`__) - P2655R3: ``common_reference_t`` of ``reference_wrapper`` Should Be a Reference Type (`Github <https://github.com/llvm/llvm-project/issues/105260>`__) +- P2944R3: Comparisons for ``reference_wrapper`` (`Github <https://github.com/llvm/llvm-project/issues/105424>`__) +- P3379R0: Constrain ``std::expected equality`` operators (`Github <https://github.com/llvm/llvm-project/issues/118135>`__) Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv index e4fa07d..f1d8e9a 100644 --- a/libcxx/docs/Status/Cxx23Papers.csv +++ b/libcxx/docs/Status/Cxx23Papers.csv @@ -60,7 +60,7 @@ "`P1642R11 <https://wg21.link/P1642R11>`__","Freestanding ``[utilities]``, ``[ranges]``, and ``[iterators]``","2022-07 (Virtual)","","","" "`P1899R3 <https://wg21.link/P1899R3>`__","``stride_view``","2022-07 (Virtual)","","","" "`P2093R14 <https://wg21.link/P2093R14>`__","Formatted output","2022-07 (Virtual)","|Complete|","18","" -"`P2165R4 <https://wg21.link/P2165R4>`__","Compatibility between ``tuple``, ``pair`` and ``tuple-like`` objects","2022-07 (Virtual)","|Partial|","","Only the part for ``zip_view`` is implemented." +"`P2165R4 <https://wg21.link/P2165R4>`__","Compatibility between ``tuple``, ``pair`` and ``tuple-like`` objects","2022-07 (Virtual)","|Partial|","","Changes of ``tuple``, ``adjacent_view``, and ``cartesian_product_view`` are not yet implemented." "`P2278R4 <https://wg21.link/P2278R4>`__","``cbegin`` should always return a constant iterator","2022-07 (Virtual)","","","" "`P2286R8 <https://wg21.link/P2286R8>`__","Formatting Ranges","2022-07 (Virtual)","|Complete|","16","" "`P2291R3 <https://wg21.link/P2291R3>`__","Add Constexpr Modifiers to Functions ``to_chars`` and ``from_chars`` for Integral Types in ``<charconv>`` Header","2022-07 (Virtual)","|Complete|","16","" diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 6fcb2f3..c622512 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -149,4 +149,5 @@ "`LWG3343 <https://wg21.link/LWG3343>`__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Adopted Yet","|Complete|","16","" "`LWG4139 <https://wg21.link/LWG4139>`__","§[time.zone.leap] recursive constraint in ``<=>``","Not Adopted Yet","|Complete|","20","" "`LWG3456 <https://wg21.link/LWG3456>`__","Pattern used by ``std::from_chars`` is underspecified (option B)","Not Adopted Yet","|Complete|","20","" +"`LWG3882 <https://wg21.link/LWG3882>`__","``tuple`` relational operators have confused friendships","Not Adopted Yet","|Complete|","21","The comparsion operators are constrained harder than the proposed resolution. libstdc++ and MSVC STL do the same." "","","","","","" diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index febb0c1..e8b0c95 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -59,7 +59,7 @@ "`P2248R8 <https://wg21.link/P2248R8>`__","Enabling list-initialization for algorithms","2024-03 (Tokyo)","","","" "`P2810R4 <https://wg21.link/P2810R4>`__","``is_debugger_present`` ``is_replaceable``","2024-03 (Tokyo)","","","" "`P1068R11 <https://wg21.link/P1068R11>`__","Vector API for random number generation","2024-03 (Tokyo)","","","" -"`P2944R3 <https://wg21.link/P2944R3>`__","Comparisons for ``reference_wrapper``","2024-03 (Tokyo)","|Partial|","","The changes to ``tuple``'s equality overload from P2165R4 are not yet implemented." +"`P2944R3 <https://wg21.link/P2944R3>`__","Comparisons for ``reference_wrapper``","2024-03 (Tokyo)","|Complete|","21","" "`P2642R6 <https://wg21.link/P2642R6>`__","Padded ``mdspan`` layouts","2024-03 (Tokyo)","","","" "`P3029R1 <https://wg21.link/P3029R1>`__","Better ``mdspan``'s CTAD","2024-03 (Tokyo)","|Complete|","19","" "","","","","","" diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index 03f50d9..dacc152 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -1709,41 +1709,45 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D template <class _Tp, class _Hash, class _Equal, class _Alloc> template <bool _UniqueKeys> -void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) { - __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc(); - __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr); - __bucket_list_.get_deleter().size() = __nbc; - if (__nbc > 0) { - for (size_type __i = 0; __i < __nbc; ++__i) - __bucket_list_[__i] = nullptr; - __next_pointer __pp = __first_node_.__ptr(); - __next_pointer __cp = __pp->__next_; - if (__cp != nullptr) { - size_type __chash = std::__constrain_hash(__cp->__hash(), __nbc); - __bucket_list_[__chash] = __pp; - size_type __phash = __chash; - for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr; __cp = __pp->__next_) { - __chash = std::__constrain_hash(__cp->__hash(), __nbc); - if (__chash == __phash) - __pp = __cp; - else { - if (__bucket_list_[__chash] == nullptr) { - __bucket_list_[__chash] = __pp; - __pp = __cp; - __phash = __chash; - } else { - __next_pointer __np = __cp; - if _LIBCPP_CONSTEXPR_SINCE_CXX17 (!_UniqueKeys) { - for (; __np->__next_ != nullptr && - key_eq()(__cp->__upcast()->__get_value(), __np->__next_->__upcast()->__get_value()); - __np = __np->__next_) - ; - } - __pp->__next_ = __np->__next_; - __np->__next_ = __bucket_list_[__chash]->__next_; - __bucket_list_[__chash]->__next_ = __cp; - } +void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __bucket_count) { + __pointer_allocator& __ptr_alloc = __bucket_list_.get_deleter().__alloc(); + __bucket_list_.reset(__bucket_count > 0 ? __pointer_alloc_traits::allocate(__ptr_alloc, __bucket_count) : nullptr); + __bucket_list_.get_deleter().size() = __bucket_count; + + if (__bucket_count == 0) + return; + + for (size_type __i = 0; __i < __bucket_count; ++__i) + __bucket_list_[__i] = nullptr; + __next_pointer __pp = __first_node_.__ptr(); + __next_pointer __cp = __pp->__next_; + + if (!__cp) + return; + + size_type __chash = std::__constrain_hash(__cp->__hash(), __bucket_count); + __bucket_list_[__chash] = __pp; + size_type __phash = __chash; + for (__pp = __cp, void(), __cp = __cp->__next_; __cp != nullptr; __cp = __pp->__next_) { + __chash = std::__constrain_hash(__cp->__hash(), __bucket_count); + if (__chash == __phash) + __pp = __cp; + else { + if (__bucket_list_[__chash] == nullptr) { + __bucket_list_[__chash] = __pp; + __pp = __cp; + __phash = __chash; + } else { + __next_pointer __np = __cp; + if _LIBCPP_CONSTEXPR (!_UniqueKeys) { + for (; __np->__next_ != nullptr && + key_eq()(__cp->__upcast()->__get_value(), __np->__next_->__upcast()->__get_value()); + __np = __np->__next_) + ; } + __pp->__next_ = __np->__next_; + __np->__next_ = __bucket_list_[__chash]->__next_; + __bucket_list_[__chash]->__next_ = __cp; } } } diff --git a/libcxx/include/tuple b/libcxx/include/tuple index 1623702..23a391d 100644 --- a/libcxx/include/tuple +++ b/libcxx/include/tuple @@ -106,6 +106,11 @@ public: void swap(tuple&) noexcept(AND(swap(declval<T&>(), declval<T&>())...)); // constexpr in C++20 constexpr void swap(const tuple&) const noexcept(see-below); // C++23 + + template<tuple-like UTuple> + friend constexpr bool operator==(const tuple& t, const UTuple& u); // C++23 + template<tuple-like UTuple> + friend constexpr auto operator<=>(const tuple& t, const UTuple& u); // C++23 }; @@ -220,6 +225,7 @@ template <class... Types> # include <__config> # include <__cstddef/size_t.h> # include <__fwd/array.h> +# include <__fwd/get.h> # include <__fwd/pair.h> # include <__fwd/tuple.h> # include <__memory/allocator_arg_t.h> @@ -229,6 +235,7 @@ template <class... Types> # include <__tuple/make_tuple_types.h> # include <__tuple/sfinae_helpers.h> # include <__tuple/tuple_element.h> +# include <__tuple/tuple_like.h> # include <__tuple/tuple_like_ext.h> # include <__tuple/tuple_size.h> # include <__tuple/tuple_types.h> @@ -287,6 +294,68 @@ _LIBCPP_BEGIN_NAMESPACE_STD # ifndef _LIBCPP_CXX03_LANG +template <size_t _Ip, class _Tp, class _Up> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __tuple_compare_equal(const _Tp& __x, const _Up& __y) { + if constexpr (_Ip == 0) + return true; + else + return std::__tuple_compare_equal<_Ip - 1>(__x, __y) && std::get<_Ip - 1>(__x) == std::get<_Ip - 1>(__y); +} + +# if _LIBCPP_STD_VER >= 26 +template <class _Tp, class _Up, class _IndexSeq = make_index_sequence<tuple_size_v<_Tp>>> +inline constexpr bool __can_tuple_compare_equal = false; + +// TODO(LLVM 22): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends +// because the resolution of CWG2369 landed in LLVM-21. +template <class _Tp, class _Up, size_t... _Is> + requires(tuple_size_v<_Tp> == tuple_size_v<_Up>) +inline constexpr bool __can_tuple_compare_equal<_Tp, _Up, index_sequence<_Is...>> = + __all<requires(const tuple_element_t<_Is, _Tp>& __t, const tuple_element_t<_Is, _Up>& __u) { + { __t == __u } -> __boolean_testable; + }...>::value; +# endif // _LIBCPP_STD_VER >= 26 + +# if _LIBCPP_STD_VER >= 20 +template <class _Ret, class _Tp, class _Up, size_t... _Is> +_LIBCPP_HIDE_FROM_ABI constexpr _Ret __tuple_compare_three_way(const _Tp& __x, const _Up& __y, index_sequence<_Is...>) { + _Ret __result = strong_ordering::equal; + static_cast<void>( + ((__result = std::__synth_three_way(std::get<_Is>(__x), std::get<_Is>(__y)), __result != 0) || ...)); + return __result; +} +# endif // _LIBCPP_STD_VER >= 20 + +# if _LIBCPP_STD_VER >= 23 +template <class> +inline constexpr bool __is_tuple_v = false; + +template <class... _Tp> +inline constexpr bool __is_tuple_v<tuple<_Tp...>> = true; + +template <class _Tp> +concept __tuple_like_no_tuple = __tuple_like<_Tp> && !__is_tuple_v<_Tp>; + +template <class _Tp, class _Up, class _IndexSeq> +struct __tuple_common_comparison_category_impl {}; + +// TODO(LLVM 22): Remove `tuple_size_v<_Tp> == tuple_size_v<_Up>` here once once LLVM-20 support ends +// because the resolution of CWG2369 landed in LLVM-21. +template <class _Tp, class _Up, size_t... _Is> + requires(tuple_size_v<_Tp> == tuple_size_v<_Up>) && requires { + typename common_comparison_category_t< + __synth_three_way_result<tuple_element_t<_Is, _Tp>, tuple_element_t<_Is, _Up>>...>; + } +struct __tuple_common_comparison_category_impl<_Tp, _Up, index_sequence<_Is...>> { + using type _LIBCPP_NODEBUG = + common_comparison_category_t<__synth_three_way_result<tuple_element_t<_Is, _Tp>, tuple_element_t<_Is, _Up>>...>; +}; + +template <__tuple_like _Tp, __tuple_like _Up> +using __tuple_common_comparison_category _LIBCPP_NODEBUG = + __tuple_common_comparison_category_impl<_Tp, _Up, make_index_sequence<tuple_size_v<_Tp>>>::type; +# endif // _LIBCPP_STD_VER >= 23 + // __tuple_leaf template <size_t _Ip, class _Hp, bool = is_empty<_Hp>::value && !__libcpp_is_final<_Hp>::value > @@ -448,33 +517,28 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __swallow(_Tp&&...) _NO template <class _Indx, class... _Tp> struct __tuple_impl; +struct __forward_args {}; +struct __value_init {}; + template <size_t... _Indx, class... _Tp> struct _LIBCPP_DECLSPEC_EMPTY_BASES __tuple_impl<__index_sequence<_Indx...>, _Tp...> : public __tuple_leaf<_Indx, _Tp>... { _LIBCPP_HIDE_FROM_ABI constexpr __tuple_impl() noexcept( __all<is_nothrow_default_constructible<_Tp>::value...>::value) {} - template <size_t... _Uf, class... _Tf, size_t... _Ul, class... _Tl, class... _Up> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl( - __index_sequence<_Uf...>, - __tuple_types<_Tf...>, - __index_sequence<_Ul...>, - __tuple_types<_Tl...>, - _Up&&... __u) noexcept(__all<is_nothrow_constructible<_Tf, _Up>::value...>::value && - __all<is_nothrow_default_constructible<_Tl>::value...>::value) - : __tuple_leaf<_Uf, _Tf>(std::forward<_Up>(__u))..., __tuple_leaf<_Ul, _Tl>()... {} - - template <class _Alloc, size_t... _Uf, class... _Tf, size_t... _Ul, class... _Tl, class... _Up> + template <class... _Args> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(__forward_args, _Args&&... __args) + : __tuple_leaf<_Indx, _Tp>(std::forward<_Args>(__args))... {} + + template <class _Alloc> + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl(allocator_arg_t, const _Alloc& __alloc, __value_init) + : __tuple_leaf<_Indx, _Tp>(__uses_alloc_ctor<_Tp, _Alloc>(), __alloc)... {} + + template <class _Alloc, class... _Args> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __tuple_impl( - allocator_arg_t, - const _Alloc& __a, - __index_sequence<_Uf...>, - __tuple_types<_Tf...>, - __index_sequence<_Ul...>, - __tuple_types<_Tl...>, - _Up&&... __u) - : __tuple_leaf<_Uf, _Tf>(__uses_alloc_ctor<_Tf, _Alloc, _Up>(), __a, std::forward<_Up>(__u))..., - __tuple_leaf<_Ul, _Tl>(__uses_alloc_ctor<_Tl, _Alloc>(), __a)... {} + allocator_arg_t, const _Alloc& __alloc, __forward_args, _Args&&... __args) + : __tuple_leaf<_Indx, _Tp>(__uses_alloc_ctor<_Tp, _Alloc, _Args>(), __alloc, std::forward<_Args>(__args))... {} template <class _Tuple, __enable_if_t<__tuple_constructible<_Tuple, tuple<_Tp...> >::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __tuple_impl(_Tuple&& __t) noexcept( @@ -559,12 +623,7 @@ public: __enable_if_t< _And< _IsDefault<_Tp>... >::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, _IsImpDefault<_Tp>...> >::value) tuple(allocator_arg_t, _Alloc const& __a) - : __base_(allocator_arg_t(), - __a, - __index_sequence<>(), - __tuple_types<>(), - __make_index_sequence<sizeof...(_Tp)>(), - __tuple_types<_Tp...>()) {} + : __base_(allocator_arg_t(), __a, __value_init{}) {} // tuple(const T&...) constructors (including allocator_arg_t variants) template <template <class...> class _And = _And, @@ -572,11 +631,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_Lazy<_And, is_convertible<const _Tp&, _Tp>...> >::value) tuple(const _Tp&... __t) noexcept(_And<is_nothrow_copy_constructible<_Tp>...>::value) - : __base_(__make_index_sequence<sizeof...(_Tp)>(), - __tuple_types<_Tp...>(), - __index_sequence<>(), - __tuple_types<>(), - __t...) {} + : __base_(__forward_args{}, __t...) {} template <class _Alloc, template <class...> class _And = _And, @@ -584,13 +639,7 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, is_convertible<const _Tp&, _Tp>...> >::value) tuple(allocator_arg_t, const _Alloc& __a, const _Tp&... __t) - : __base_(allocator_arg_t(), - __a, - __make_index_sequence<sizeof...(_Tp)>(), - __tuple_types<_Tp...>(), - __index_sequence<>(), - __tuple_types<>(), - __t...) {} + : __base_(allocator_arg_t(), __a, __forward_args{}, __t...) {} // tuple(U&& ...) constructors (including allocator_arg_t variants) template <class... _Up> @@ -609,11 +658,7 @@ public: int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value) tuple(_Up&&... __u) noexcept(_And<is_nothrow_constructible<_Tp, _Up>...>::value) - : __base_(__make_index_sequence<sizeof...(_Up)>(), - __tuple_types<_Tp...>(), - __index_sequence<>(), - __tuple_types<>(), - std::forward<_Up>(__u)...) {} + : __base_(__forward_args{}, std::forward<_Up>(__u)...) {} template <class _Alloc, class... _Up, @@ -621,13 +666,7 @@ public: int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 explicit(_Not<_Lazy<_And, is_convertible<_Up, _Tp>...> >::value) tuple(allocator_arg_t, const _Alloc& __a, _Up&&... __u) - : __base_(allocator_arg_t(), - __a, - __make_index_sequence<sizeof...(_Up)>(), - __tuple_types<_Tp...>(), - __index_sequence<>(), - __tuple_types<>(), - std::forward<_Up>(__u)...) {} + : __base_(allocator_arg_t(), __a, __forward_args{}, std::forward<_Up>(__u)...) {} // Copy and move constructors (including the allocator_arg_t variants) tuple(const tuple&) = default; @@ -986,7 +1025,24 @@ public: noexcept(__all<is_nothrow_swappable_v<const _Tp&>...>::value) { __base_.swap(__t.__base_); } -# endif // _LIBCPP_STD_VER >= 23 + + template <__tuple_like_no_tuple _UTuple> +# if _LIBCPP_STD_VER >= 26 + requires __can_tuple_compare_equal<tuple, _UTuple> && (sizeof...(_Tp) == tuple_size_v<_UTuple>) +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const tuple& __x, const _UTuple& __y) { + static_assert(sizeof...(_Tp) == tuple_size_v<_UTuple>, "Can't compare tuple-like values of different sizes"); + return std::__tuple_compare_equal<sizeof...(_Tp)>(__x, __y); + } + + template <__tuple_like_no_tuple _UTuple> + requires(sizeof...(_Tp) == tuple_size_v<_UTuple>) + _LIBCPP_HIDE_FROM_ABI friend constexpr __tuple_common_comparison_category<tuple, _UTuple> + operator<=>(const tuple& __x, const _UTuple& __y) { + return std::__tuple_compare_three_way<__tuple_common_comparison_category<tuple, _UTuple>>( + __x, __y, index_sequence_for<_Tp...>{}); + } +# endif // _LIBCPP_STD_VER >= 23 }; _LIBCPP_DIAGNOSTIC_PUSH @@ -1008,6 +1064,21 @@ public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(tuple&) _NOEXCEPT {} # if _LIBCPP_STD_VER >= 23 _LIBCPP_HIDE_FROM_ABI constexpr void swap(const tuple&) const noexcept {} + + template <__tuple_like_no_tuple _UTuple> +# if _LIBCPP_STD_VER >= 26 + requires(tuple_size_v<_UTuple> == 0) +# endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const tuple&, const _UTuple&) { + static_assert(tuple_size_v<_UTuple> == 0, "Can't compare tuple-like values of different sizes"); + return true; + } + + template <__tuple_like_no_tuple _UTuple> + requires(tuple_size_v<_UTuple> == 0) + _LIBCPP_HIDE_FROM_ABI friend constexpr strong_ordering operator<=>(const tuple&, const _UTuple&) { + return strong_ordering::equal; + } # endif }; _LIBCPP_DIAGNOSTIC_POP @@ -1126,22 +1197,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 tuple<_Tp&&...> forwa return tuple<_Tp&&...>(std::forward<_Tp>(__t)...); } -template <size_t _Ip> -struct __tuple_equal { - template <class _Tp, class _Up> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool operator()(const _Tp& __x, const _Up& __y) { - return __tuple_equal<_Ip - 1>()(__x, __y) && std::get<_Ip - 1>(__x) == std::get<_Ip - 1>(__y); - } -}; - -template <> -struct __tuple_equal<0> { - template <class _Tp, class _Up> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool operator()(const _Tp&, const _Up&) { - return true; - } -}; - template <class... _Tp, class... _Up> # if _LIBCPP_STD_VER >= 26 requires(__all<requires(const _Tp& __t, const _Up& __u) { @@ -1151,27 +1206,19 @@ template <class... _Tp, class... _Up> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool operator==(const tuple<_Tp...>& __x, const tuple<_Up...>& __y) { static_assert(sizeof...(_Tp) == sizeof...(_Up), "Can't compare tuples of different sizes"); - return __tuple_equal<sizeof...(_Tp)>()(__x, __y); + return std::__tuple_compare_equal<sizeof...(_Tp)>(__x, __y); } # if _LIBCPP_STD_VER >= 20 // operator<=> -template <class... _Tp, class... _Up, size_t... _Is> -_LIBCPP_HIDE_FROM_ABI constexpr auto -__tuple_compare_three_way(const tuple<_Tp...>& __x, const tuple<_Up...>& __y, index_sequence<_Is...>) { - common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...> __result = strong_ordering::equal; - static_cast<void>( - ((__result = std::__synth_three_way(std::get<_Is>(__x), std::get<_Is>(__y)), __result != 0) || ...)); - return __result; -} - template <class... _Tp, class... _Up> requires(sizeof...(_Tp) == sizeof...(_Up)) _LIBCPP_HIDE_FROM_ABI constexpr common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...> operator<=>(const tuple<_Tp...>& __x, const tuple<_Up...>& __y) { - return std::__tuple_compare_three_way(__x, __y, index_sequence_for<_Tp...>{}); + return std::__tuple_compare_three_way<common_comparison_category_t<__synth_three_way_result<_Tp, _Up>...>>( + __x, __y, index_sequence_for<_Tp...>{}); } # else // _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/version b/libcxx/include/version index d98049b..aae9277 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -555,7 +555,7 @@ __cpp_lib_void_t 201411L <type_traits> # define __cpp_lib_constexpr_new 202406L # endif # define __cpp_lib_constexpr_queue 202502L -// # define __cpp_lib_constrained_equality 202411L +# define __cpp_lib_constrained_equality 202411L // # define __cpp_lib_copyable_function 202306L // # define __cpp_lib_debugging 202311L // # define __cpp_lib_default_template_type_for_algorithm_values 202403L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp index 74cf85e..4ec6c46 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp @@ -93,17 +93,11 @@ #elif TEST_STD_VER > 23 -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should be defined in c++26" -# endif -# if __cpp_lib_constrained_equality != 202411L -# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" -# endif -# else -# ifdef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202411L +# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" # endif # ifndef __cpp_lib_expected diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp index 148a6db..ccdb1a8 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp @@ -119,17 +119,11 @@ #elif TEST_STD_VER > 23 -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should be defined in c++26" -# endif -# if __cpp_lib_constrained_equality != 202411L -# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" -# endif -# else -# ifdef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202411L +# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" # endif # if !defined(_LIBCPP_VERSION) diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp index b10441f..ceb338d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp @@ -270,17 +270,11 @@ # error "__cpp_lib_constexpr_tuple should have the value 201811L in c++26" # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should be defined in c++26" -# endif -# if __cpp_lib_constrained_equality != 202411L -# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" -# endif -# else -# ifdef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202411L +# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" # endif # ifndef __cpp_lib_make_from_tuple diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp index 02e7feb..b882a5d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp @@ -401,17 +401,11 @@ # error "__cpp_lib_constexpr_utility should have the value 201811L in c++26" # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should be defined in c++26" -# endif -# if __cpp_lib_constrained_equality != 202411L -# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" -# endif -# else -# ifdef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202411L +# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" # endif # ifndef __cpp_lib_exchange_function diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp index dea2f29..ed0bb22 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp @@ -99,17 +99,11 @@ #elif TEST_STD_VER > 23 -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should be defined in c++26" -# endif -# if __cpp_lib_constrained_equality != 202411L -# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" -# endif -# else -# ifdef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202411L +# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" # endif # if !defined(_LIBCPP_VERSION) diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 962688e..7bd8e89 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -6631,17 +6631,11 @@ # error "__cpp_lib_constexpr_vector should have the value 201907L in c++26" # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should be defined in c++26" -# endif -# if __cpp_lib_constrained_equality != 202411L -# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" -# endif -# else -# ifdef __cpp_lib_constrained_equality -# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202411L +# error "__cpp_lib_constrained_equality should have the value 202411L in c++26" # endif # ifndef __cpp_lib_containers_ranges diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/eq.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/eq.pass.cpp index 779a89b..b0301f3 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/eq.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/eq.pass.cpp @@ -13,6 +13,8 @@ // template<class... TTypes, class... UTypes> // bool // operator==(const tuple<TTypes...>& t, const tuple<UTypes...>& u); +// template<tuple-like UTuple> +// friend constexpr bool operator==(const tuple& t, const UTuple& u); // since C++23 // UNSUPPORTED: c++03 @@ -23,6 +25,13 @@ #include "test_comparisons.h" #include "test_macros.h" +#if TEST_STD_VER >= 23 +# include <ranges> +#endif +#if TEST_STD_VER >= 26 +# include <complex> +#endif + #if TEST_STD_VER >= 26 // Test SFINAE. @@ -41,140 +50,249 @@ static_assert( static_assert( !std::equality_comparable_with<std::tuple<EqualityComparable, EqualityComparable>, std::tuple<EqualityComparable>>); +// Heterogeneous comparisons. +// TODO: Use equality_comparable_with once other changes of tuple introduced in P2165R4 are implemented. +template <class T, class U> +concept can_eq_compare = requires(const T& t, const U& u) { t == u; }; + +static_assert(can_eq_compare<std::tuple<EqualityComparable>, std::array<EqualityComparable, 1>>); +static_assert(!can_eq_compare<std::tuple<EqualityComparable>, std::array<NonComparable, 1>>); + +static_assert(can_eq_compare<std::tuple<EqualityComparable, EqualityComparable>, + std::pair<EqualityComparable, EqualityComparable>>); +static_assert( + !can_eq_compare<std::tuple<EqualityComparable, EqualityComparable>, std::pair<EqualityComparable, NonComparable>>); + +static_assert(can_eq_compare<std::tuple<int*, int*>, std::ranges::subrange<const int*>>); +static_assert(!can_eq_compare<std::tuple<int (*)[1], int (*)[1]>, std::ranges::subrange<const int*>>); +static_assert(can_eq_compare<std::tuple<double, double>, std::complex<float>>); +static_assert(!can_eq_compare<std::tuple<int*, int*>, std::complex<float>>); + +// Size mismatch in heterogeneous comparisons. +static_assert(!can_eq_compare<std::tuple<>, std::array<EqualityComparable, 2>>); +static_assert(!can_eq_compare<std::tuple<EqualityComparable>, std::array<EqualityComparable, 2>>); +static_assert(!can_eq_compare<std::tuple<>, std::pair<EqualityComparable, EqualityComparable>>); +static_assert(!can_eq_compare<std::tuple<EqualityComparable>, std::pair<EqualityComparable, EqualityComparable>>); +static_assert(!can_eq_compare<std::tuple<int*>, std::ranges::subrange<int*>>); +static_assert(!can_eq_compare<std::tuple<double>, std::complex<double>>); + #endif -int main(int, char**) -{ - { - typedef std::tuple<> T1; - typedef std::tuple<> T2; - const T1 t1; - const T2 t2; - assert(t1 == t2); - assert(!(t1 != t2)); - } - { - typedef std::tuple<int> T1; - typedef std::tuple<double> T2; - const T1 t1(1); - const T2 t2(1.1); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<int> T1; - typedef std::tuple<double> T2; - const T1 t1(1); - const T2 t2(1); - assert(t1 == t2); - assert(!(t1 != t2)); - } - { - typedef std::tuple<int, double> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1, 2); - assert(t1 == t2); - assert(!(t1 != t2)); - } - { - typedef std::tuple<int, double> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1, 3); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<int, double> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1.1, 2); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<int, double> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1.1, 3); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 2, 3); - assert(t1 == t2); - assert(!(t1 != t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1.1, 2, 3); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 3, 3); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 2, 4); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 3, 2); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1.1, 2, 2); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1.1, 3, 3); - assert(!(t1 == t2)); - assert(t1 != t2); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1.1, 3, 2); - assert(!(t1 == t2)); - assert(t1 != t2); - } -#if TEST_STD_VER > 11 - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - constexpr T1 t1(1, 2, 3); - constexpr T2 t2(1.1, 3, 2); - static_assert(!(t1 == t2), ""); - static_assert(t1 != t2, ""); - } +TEST_CONSTEXPR_CXX14 bool test() { + { + typedef std::tuple<> T1; + typedef std::tuple<> T2; + const T1 t1; + const T2 t2; + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + typedef std::tuple<int> T1; + typedef std::tuple<double> T2; + const T1 t1(1); + const T2 t2(1.1); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<int> T1; + typedef std::tuple<double> T2; + const T1 t1(1); + const T2 t2(1); + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + typedef std::tuple<int, double> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1, 2); + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + typedef std::tuple<int, double> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1, 3); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<int, double> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1.1, 2); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<int, double> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1.1, 3); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 2, 3); + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1.1, 2, 3); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 3, 3); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 2, 4); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 3, 2); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1.1, 2, 2); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1.1, 3, 3); + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1.1, 3, 2); + assert(!(t1 == t2)); + assert(t1 != t2); + } +#if TEST_STD_VER >= 14 + { + using T1 = std::tuple<long, int, double>; + using T2 = std::tuple<double, long, int>; + constexpr T1 t1(1, 2, 3); + constexpr T2 t2(1.1, 3, 2); + assert(!(t1 == t2)); + assert(t1 != t2); + } #endif +#if TEST_STD_VER >= 23 + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2}; + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3}; + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::array<double, 2>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2.0}; + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::array<double, 2>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3.0}; + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + using T1 = std::tuple<const int*, const int*>; + using T2 = std::ranges::subrange<const int*>; + + int arr[1]{}; + T1 t1{arr, arr + 1}; + T2 t2{arr}; + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + using T1 = std::tuple<const int*, const int*>; + using T2 = std::ranges::subrange<const int*>; + int arr[1]{}; + T1 t1{arr, arr}; + T2 t2{arr}; + assert(!(t1 == t2)); + assert(t1 != t2); + } + { + assert((std::tuple<>{} == std::array<int*, 0>{})); + assert((std::tuple<>{} == std::array<double, 0>{})); + } +#endif +#if TEST_STD_VER >= 26 + { + using T1 = std::tuple<long, int>; + using T2 = std::complex<double>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2.0}; + assert(t1 == t2); + assert(!(t1 != t2)); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::complex<double>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3.0}; + assert(!(t1 == t2)); + assert(t1 != t2); + } +#endif + + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 14 + static_assert(test(), ""); +#endif return 0; } diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/lt.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/lt.pass.cpp index 0ece614..ef50454 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/lt.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/lt.pass.cpp @@ -28,186 +28,305 @@ // UNSUPPORTED: c++03 -#include <tuple> -#include <string> #include <cassert> +#include <tuple> #include "test_macros.h" -int main(int, char**) -{ - { - typedef std::tuple<> T1; - typedef std::tuple<> T2; - const T1 t1; - const T2 t2; - assert(!(t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long> T1; - typedef std::tuple<double> T2; - const T1 t1(1); - const T2 t2(1); - assert(!(t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long> T1; - typedef std::tuple<double> T2; - const T1 t1(1); - const T2 t2(0.9); - assert(!(t1 < t2)); - assert(!(t1 <= t2)); - assert( (t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long> T1; - typedef std::tuple<double> T2; - const T1 t1(1); - const T2 t2(1.1); - assert( (t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert(!(t1 >= t2)); - } - { - typedef std::tuple<long, int> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1, 2); - assert(!(t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(0.9, 2); - assert(!(t1 < t2)); - assert(!(t1 <= t2)); - assert( (t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1.1, 2); - assert( (t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert(!(t1 >= t2)); - } - { - typedef std::tuple<long, int> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1, 1); - assert(!(t1 < t2)); - assert(!(t1 <= t2)); - assert( (t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int> T1; - typedef std::tuple<double, long> T2; - const T1 t1(1, 2); - const T2 t2(1, 3); - assert( (t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert(!(t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 2, 3); - assert(!(t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(0.9, 2, 3); - assert(!(t1 < t2)); - assert(!(t1 <= t2)); - assert( (t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1.1, 2, 3); - assert( (t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert(!(t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 1, 3); - assert(!(t1 < t2)); - assert(!(t1 <= t2)); - assert( (t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 3, 3); - assert( (t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert(!(t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 2, 2); - assert(!(t1 < t2)); - assert(!(t1 <= t2)); - assert( (t1 > t2)); - assert( (t1 >= t2)); - } - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - const T1 t1(1, 2, 3); - const T2 t2(1, 2, 4); - assert( (t1 < t2)); - assert( (t1 <= t2)); - assert(!(t1 > t2)); - assert(!(t1 >= t2)); - } -#if TEST_STD_VER > 11 - { - typedef std::tuple<long, int, double> T1; - typedef std::tuple<double, long, int> T2; - constexpr T1 t1(1, 2, 3); - constexpr T2 t2(1, 2, 4); - static_assert( (t1 < t2), ""); - static_assert( (t1 <= t2), ""); - static_assert(!(t1 > t2), ""); - static_assert(!(t1 >= t2), ""); - } +#if TEST_STD_VER >= 23 +# include <array> +# include <ranges> +# include <utility> +#endif +#if TEST_STD_VER >= 26 +# include <complex> +#endif + +TEST_CONSTEXPR_CXX14 bool test() { + { + typedef std::tuple<> T1; + typedef std::tuple<> T2; + const T1 t1; + const T2 t2; + assert(!(t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long> T1; + typedef std::tuple<double> T2; + const T1 t1(1); + const T2 t2(1); + assert(!(t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long> T1; + typedef std::tuple<double> T2; + const T1 t1(1); + const T2 t2(0.9); + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert((t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long> T1; + typedef std::tuple<double> T2; + const T1 t1(1); + const T2 t2(1.1); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + typedef std::tuple<long, int> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1, 2); + assert(!(t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(0.9, 2); + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert((t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1.1, 2); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + typedef std::tuple<long, int> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1, 1); + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert((t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int> T1; + typedef std::tuple<double, long> T2; + const T1 t1(1, 2); + const T2 t2(1, 3); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 2, 3); + assert(!(t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(0.9, 2, 3); + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert((t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1.1, 2, 3); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 1, 3); + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert((t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 3, 3); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 2, 2); + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert((t1 > t2)); + assert((t1 >= t2)); + } + { + typedef std::tuple<long, int, double> T1; + typedef std::tuple<double, long, int> T2; + const T1 t1(1, 2, 3); + const T2 t2(1, 2, 4); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } +#if TEST_STD_VER >= 14 + { + using T1 = std::tuple<long, int, double>; + using T2 = std::tuple<double, long, int>; + constexpr T1 t1(1, 2, 3); + constexpr T2 t2(1, 2, 4); + assert((t1 < t2)); + assert((t1 <= t2)); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } +#endif +#if TEST_STD_VER >= 23 + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2}; + assert(!(t1 < t2)); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(t1 >= t2); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 3}; + assert(t1 < t2); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::array<double, 2>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2.0}; + assert(!(t1 < t2)); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(t1 >= t2); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::array<double, 2>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3.0}; + assert(t1 < t2); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } + { + using T1 = std::tuple<const int*, const int*>; + using T2 = std::ranges::subrange<const int*>; + + int arr[1]{}; + T1 t1{arr, arr + 1}; + T2 t2{arr}; + assert(!(t1 < t2)); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(t1 >= t2); + } + { + using T1 = std::tuple<const int*, const int*>; + using T2 = std::ranges::subrange<const int*>; + + int arr[1]{}; + T1 t1{arr + 1, arr + 1}; + T2 t2{arr}; + assert(!(t1 < t2)); + assert(!(t1 <= t2)); + assert(t1 > t2); + assert(t1 >= t2); + } + { + constexpr std::tuple<> t1{}; + constexpr std::array<int*, 0> t2{}; + assert(!(t1 < t2)); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(t1 >= t2); + } + { + constexpr std::tuple<> t1{}; + constexpr std::array<double, 0> t2{}; + assert(!(t1 < t2)); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(t1 >= t2); + } +#endif +#if TEST_STD_VER >= 26 + { + using T1 = std::tuple<long, int>; + using T2 = std::complex<double>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2.0}; + assert(!(t1 < t2)); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(t1 >= t2); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::complex<double>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3.0}; + assert(t1 < t2); + assert(t1 <= t2); + assert(!(t1 > t2)); + assert(!(t1 >= t2)); + } #endif + return true; +} + +int main(int, char**) { + test(); +#if TEST_STD_VER >= 14 + static_assert(test(), ""); +#endif return 0; } diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/size_incompatible_three_way.compile.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/size_incompatible_three_way.compile.pass.cpp index f9c72a1..8eae8d6 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/size_incompatible_three_way.compile.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/size_incompatible_three_way.compile.pass.cpp @@ -13,16 +13,31 @@ // template<class... TTypes, class... UTypes> // auto // operator<=>(const tuple<TTypes...>& t, const tuple<UTypes...>& u); +// template<tuple-like UTuple> +// friend constexpr auto operator<=>(const tuple& t, const UTuple& u); // since C++23 // UNSUPPORTED: c++03, c++11, c++14, c++17 +#include <array> +#include <complex> +#include <ranges> #include <tuple> +#include <utility> template <class T, class U> concept can_compare = requires(T t, U u) { t <=> u; }; -typedef std::tuple<int> T1; -typedef std::tuple<int, long> T2; +using T1 = std::tuple<int>; +using T2 = std::tuple<int, long>; +using T1P = std::tuple<int*>; static_assert(!can_compare<T1, T2>); static_assert(!can_compare<T2, T1>); +static_assert(!can_compare<T1, std::array<int, 2>>); +static_assert(!can_compare<std::array<int, 2>, T1>); +static_assert(!can_compare<T1, std::pair<int, long>>); +static_assert(!can_compare<std::pair<int, long>, T1>); +static_assert(!can_compare<T1, std::complex<double>>); +static_assert(!can_compare<std::complex<double>, T1>); +static_assert(!can_compare<T1P, std::ranges::subrange<int*>>); +static_assert(!can_compare<std::ranges::subrange<int*>, T1P>); diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/three_way.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/three_way.pass.cpp index d9543148..697d0c0 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/three_way.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.rel/three_way.pass.cpp @@ -13,6 +13,8 @@ // template<class... TTypes, class... UTypes> // auto // operator<=>(const tuple<TTypes...>& t, const tuple<UTypes...>& u); +// template<tuple-like UTuple> +// friend constexpr auto operator<=>(const tuple& t, const UTuple& u); // since C++23 // UNSUPPORTED: c++03, c++11, c++14, c++17 @@ -22,11 +24,15 @@ TEST_CLANG_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_GCC_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_MSVC_DIAGNOSTIC_IGNORED(4242 4244) +#include <array> #include <cassert> #include <compare> +#include <complex> #include <limits> +#include <ranges> #include <tuple> #include <type_traits> // std::is_constant_evaluated +#include <utility> // A custom three-way result type struct CustomEquality { @@ -36,6 +42,11 @@ struct CustomEquality { }; constexpr bool test() { + struct WeakSpaceship { + constexpr bool operator==(const WeakSpaceship&) const { return true; } + constexpr std::weak_ordering operator<=>(const WeakSpaceship&) const { return std::weak_ordering::equivalent; } + }; + // Empty tuple { typedef std::tuple<> T0; @@ -135,23 +146,17 @@ constexpr bool test() { ASSERT_SAME_TYPE(decltype(T1() <=> T2()), std::strong_ordering); } { - struct WeakSpaceship { - constexpr bool operator==(const WeakSpaceship&) const { return true; } - constexpr std::weak_ordering operator<=>(const WeakSpaceship&) const { return std::weak_ordering::equivalent; } - }; - { - typedef std::tuple<int, unsigned int, WeakSpaceship> T1; - typedef std::tuple<int, unsigned long, WeakSpaceship> T2; - // Strongly ordered members and a weakly ordered member yields weak ordering. - ASSERT_SAME_TYPE(decltype(T1() <=> T2()), std::weak_ordering); - } - { - typedef std::tuple<unsigned int, int, WeakSpaceship> T1; - typedef std::tuple<double, long, WeakSpaceship> T2; - // Doubles are partially ordered, so one partial, one strong, and one weak ordering - // yields partial ordering. - ASSERT_SAME_TYPE(decltype(T1() <=> T2()), std::partial_ordering); - } + typedef std::tuple<int, unsigned int, WeakSpaceship> T1; + typedef std::tuple<int, unsigned long, WeakSpaceship> T2; + // Strongly ordered members and a weakly ordered member yields weak ordering. + ASSERT_SAME_TYPE(decltype(T1() <=> T2()), std::weak_ordering); + } + { + typedef std::tuple<unsigned int, int, WeakSpaceship> T1; + typedef std::tuple<double, long, WeakSpaceship> T2; + // Doubles are partially ordered, so one partial, one strong, and one weak ordering + // yields partial ordering. + ASSERT_SAME_TYPE(decltype(T1() <=> T2()), std::partial_ordering); } { struct NoSpaceship { @@ -224,6 +229,134 @@ constexpr bool test() { } } +// Heterogeneous comparisons enabled by P2165R4. +#if TEST_STD_VER >= 23 + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<int, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1, 2}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::equal); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<int, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1, 0}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::greater); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::less); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::equivalent); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::pair<double, long>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::less); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::array<double, 2>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2.0}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::equivalent); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::array<double, 2>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3.0}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::less); + } + { + using T1 = std::tuple<const int*, const int*>; + using T2 = std::ranges::subrange<const int*>; + + int arr[1]{}; + T1 t1{arr, arr + 1}; + T2 t2{arr}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::equal); + } + { + using T1 = std::tuple<const int*, const int*>; + using T2 = std::ranges::subrange<const int*>; + + int arr[1]{}; + T1 t1{arr + 1, arr + 1}; + T2 t2{arr}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::greater); + } + { + constexpr std::tuple<WeakSpaceship, WeakSpaceship> t1{}; + constexpr std::pair<WeakSpaceship, WeakSpaceship> t2{}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::weak_ordering); + assert((t1 <=> t2) == std::weak_ordering::equivalent); + } + { + constexpr std::tuple<WeakSpaceship, WeakSpaceship> t1{}; + constexpr std::array<WeakSpaceship, 2> t2{}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::weak_ordering); + assert((t1 <=> t2) == std::weak_ordering::equivalent); + } + { + constexpr std::tuple<> t1{}; + constexpr std::array<int*, 0> t2{}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::equal); + } + { + constexpr std::tuple<> t1{}; + constexpr std::array<double, 0> t2{}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::equal); + } + { + constexpr std::tuple<> t1{}; + constexpr std::array<WeakSpaceship, 0> t2{}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::strong_ordering); + assert((t1 <=> t2) == std::strong_ordering::equal); + } +#endif +#if TEST_STD_VER >= 26 + { + using T1 = std::tuple<long, int>; + using T2 = std::complex<double>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.0, 2.0}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::equivalent); + } + { + using T1 = std::tuple<long, int>; + using T2 = std::complex<double>; + constexpr T1 t1{1, 2}; + constexpr T2 t2{1.1, 3.0}; + ASSERT_SAME_TYPE(decltype(t1 <=> t2), std::partial_ordering); + assert((t1 <=> t2) == std::partial_ordering::less); + } +#endif + return true; } diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index fe175fd7..d9317e0 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -442,11 +442,9 @@ feature_test_macros = [ { "name": "__cpp_lib_constrained_equality", "values": { - # "c++26": 202403, # P2944R3: Comparisons for reference_wrapper "c++26": 202411, # P3379R0: Constrain std::expected equality operators }, "headers": ["expected", "optional", "tuple", "utility", "variant"], - "unimplemented": True, }, { "name": "__cpp_lib_containers_ranges", diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index a145530..8802c8c 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -46,6 +46,8 @@ public: private: void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + bool tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const; }; } // end anonymous namespace @@ -1155,6 +1157,78 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, } } +// Try GOT indirection to PC relative optimization. +// From: +// * pcalau12i $a0, %got_pc_hi20(sym_got) +// * ld.w/d $a0, $a0, %got_pc_lo12(sym_got) +// To: +// * pcalau12i $a0, %pc_hi20(sym) +// * addi.w/d $a0, $a0, %pc_lo12(sym) +// +// Note: Althouth the optimization has been performed, the GOT entries still +// exists, similarly to AArch64. Eliminating the entries will increase code +// complexity. +bool LoongArch::tryGotToPCRel(uint8_t *loc, const Relocation &rHi20, + const Relocation &rLo12, uint64_t secAddr) const { + // Check if the relocations apply to consecutive instructions. + if (rHi20.offset + 4 != rLo12.offset) + return false; + + // Check if the relocations reference the same symbol and skip undefined, + // preemptible and STT_GNU_IFUNC symbols. + if (!rHi20.sym || rHi20.sym != rLo12.sym || !rHi20.sym->isDefined() || + rHi20.sym->isPreemptible || rHi20.sym->isGnuIFunc()) + return false; + + // GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI + // in position-independent code because these instructions produce a relative + // address. + if ((ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section)) + return false; + + // Check if the addends of the both relocations are zero. + if (rHi20.addend != 0 || rLo12.addend != 0) + return false; + + const uint32_t currInsn = read32le(loc); + const uint32_t nextInsn = read32le(loc + 4); + const uint32_t ldOpcode = ctx.arg.is64 ? LD_D : LD_W; + // Check if the first instruction is PCALAU12I and the second instruction is + // LD. + if ((currInsn & 0xfe000000) != PCALAU12I || + (nextInsn & 0xffc00000) != ldOpcode) + return false; + + // Check if use the same register. + if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn)) + return false; + + Symbol &sym = *rHi20.sym; + uint64_t symLocal = sym.getVA(ctx); + const int64_t displace = symLocal - getLoongArchPage(secAddr + rHi20.offset); + // Check if the symbol address is in + // [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800). + const int64_t underflow = -0x80000000LL - 0x800; + const int64_t overflow = 0x80000000LL - 0x800; + if (!(displace >= underflow && displace < overflow)) + return false; + + Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset, + rHi20.addend, &sym}; + Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset, rLo12.addend, + &sym}; + uint64_t pageDelta = + getLoongArchPageDelta(symLocal, secAddr + rHi20.offset, rHi20.type); + // pcalau12i $a0, %pc_hi20 + write32le(loc, insn(PCALAU12I, getD5(currInsn), 0, 0)); + relocate(loc, newRHi20, pageDelta); + // addi.w/d $a0, $a0, %pc_lo12 + write32le(loc + 4, insn(ctx.arg.is64 ? ADDI_D : ADDI_W, getD5(nextInsn), + getJ5(nextInsn), 0)); + relocate(loc + 4, newRLo12, SignExtend64(symLocal, 64)); + return true; +} + // During TLSDESC GD_TO_IE, the converted code sequence always includes an // instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val // in `getRelocTargetVA`, expr of this instruction should be adjusted to @@ -1172,6 +1246,30 @@ RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const { return expr; } +static bool pairForGotRels(ArrayRef<Relocation> relocs) { + // Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in + // pairs. + size_t i = 0; + const size_t size = relocs.size(); + for (; i != size; ++i) { + if (relocs[i].type == R_LARCH_GOT_PC_HI20) { + if (i + 1 < size && relocs[i + 1].type == R_LARCH_GOT_PC_LO12) { + ++i; + continue; + } + if (relaxable(relocs, i) && i + 2 < size && + relocs[i + 2].type == R_LARCH_GOT_PC_LO12) { + i += 2; + continue; + } + break; + } else if (relocs[i].type == R_LARCH_GOT_PC_LO12) { + break; + } + } + return i == size; +} + void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { const unsigned bits = ctx.arg.is64 ? 64 : 32; uint64_t secAddr = sec.getOutputSection()->addr; @@ -1181,6 +1279,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { secAddr += ehIn->getParent()->outSecOff; bool isExtreme = false, isRelax = false; const MutableArrayRef<Relocation> relocs = sec.relocs(); + const bool isPairForGotRels = pairForGotRels(relocs); for (size_t i = 0, size = relocs.size(); i != size; ++i) { Relocation &rel = relocs[i]; uint8_t *loc = buf + rel.offset; @@ -1264,6 +1363,24 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { tlsdescToLe(loc, rel, val); } continue; + case RE_LOONGARCH_GOT_PAGE_PC: + // In LoongArch, we try GOT indirection to PC relative optimization in + // normal or medium code model, whether or not with R_LARCH_RELAX + // relocation. Moreover, if the original code sequence can be relaxed to a + // single instruction `pcaddi`, the first instruction will be removed and + // it will not reach here. + if (isPairForGotRels && rel.type == R_LARCH_GOT_PC_HI20) { + bool isRelax = relaxable(relocs, i); + const Relocation lo12Rel = isRelax ? relocs[i + 2] : relocs[i + 1]; + if (lo12Rel.type == R_LARCH_GOT_PC_LO12 && + tryGotToPCRel(loc, rel, lo12Rel, secAddr)) { + // isRelax: skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12 + // !isRelax: skip relocation R_LARCH_GOT_PC_LO12 + i += isRelax ? 2 : 1; + continue; + } + } + break; default: break; } diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 37e4c8a..a5921fe 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/LTO/LTO.h" +#include "llvm/Object/Archive.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Support/AArch64AttributeParser.h" #include "llvm/Support/ARMAttributeParser.h" @@ -1811,6 +1812,39 @@ static uint8_t getOsAbi(const Triple &t) { } } +// For DTLTO, bitcode member names must be valid paths to files on disk. +// For thin archives, resolve `memberPath` relative to the archive's location. +// Returns true if adjusted; false otherwise. Non-thin archives are unsupported. +static bool dtltoAdjustMemberPathIfThinArchive(Ctx &ctx, StringRef archivePath, + std::string &memberPath) { + assert(!archivePath.empty()); + + if (ctx.arg.dtltoDistributor.empty()) + return false; + + // Read the archive header to determine if it's a thin archive. + auto bufferOrErr = + MemoryBuffer::getFileSlice(archivePath, sizeof(ThinArchiveMagic) - 1, 0); + if (std::error_code ec = bufferOrErr.getError()) { + ErrAlways(ctx) << "cannot open " << archivePath << ": " << ec.message(); + return false; + } + + if (!bufferOrErr->get()->getBuffer().starts_with(ThinArchiveMagic)) + return false; + + SmallString<128> resolvedPath; + if (path::is_relative(memberPath)) { + resolvedPath = path::parent_path(archivePath); + path::append(resolvedPath, memberPath); + } else + resolvedPath = memberPath; + + path::remove_dots(resolvedPath, /*remove_dot_dot=*/true); + memberPath = resolvedPath.str(); + return true; +} + BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive, bool lazy) : InputFile(ctx, BitcodeKind, mb) { @@ -1821,17 +1855,22 @@ BitcodeFile::BitcodeFile(Ctx &ctx, MemoryBufferRef mb, StringRef archiveName, if (ctx.arg.thinLTOIndexOnly) path = replaceThinLTOSuffix(ctx, mb.getBufferIdentifier()); - // ThinLTO assumes that all MemoryBufferRefs given to it have a unique - // name. If two archives define two members with the same name, this - // causes a collision which result in only one of the objects being taken - // into consideration at LTO time (which very likely causes undefined - // symbols later in the link stage). So we append file offset to make - // filename unique. StringSaver &ss = ctx.saver; - StringRef name = archiveName.empty() - ? ss.save(path) - : ss.save(archiveName + "(" + path::filename(path) + - " at " + utostr(offsetInArchive) + ")"); + StringRef name; + if (archiveName.empty() || + dtltoAdjustMemberPathIfThinArchive(ctx, archiveName, path)) { + name = ss.save(path); + } else { + // ThinLTO assumes that all MemoryBufferRefs given to it have a unique + // name. If two archives define two members with the same name, this + // causes a collision which result in only one of the objects being taken + // into consideration at LTO time (which very likely causes undefined + // symbols later in the link stage). So we append file offset to make + // filename unique. + name = ss.save(archiveName + "(" + path::filename(path) + " at " + + utostr(offsetInArchive) + ")"); + } + MemoryBufferRef mbref(mb.getBuffer(), name); obj = CHECK2(lto::InputFile::create(mbref), this); diff --git a/lld/test/ELF/dtlto/archive-thin.test b/lld/test/ELF/dtlto/archive-thin.test new file mode 100644 index 0000000..df3c2aa --- /dev/null +++ b/lld/test/ELF/dtlto/archive-thin.test @@ -0,0 +1,65 @@ +REQUIRES: x86 + +## Test that a DTLTO link assigns Module IDs to thin archive members as expected. + +RUN: rm -rf %t && split-file %s %t && cd %t + +RUN: sed 's/@t1/@t2/g' t1.ll > t2.ll +RUN: sed 's/@t1/@t3/g' t1.ll > t3.ll + +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc +RUN: opt -thinlto-bc t3.ll -o t3.bc + +RUN: llvm-ar rcs t1.a t1.bc --thin +## Create this bitcode thin archive in a subdirectory to test the expansion of +## the path to a bitcode file that is referenced using "..", e.g., in this case +## "../t2.bc". +RUN: mkdir lib +RUN: llvm-ar rcs lib/t2.a t2.bc --thin +## Create this bitcode thin archive with an absolute path entry containing "..". +RUN: llvm-ar rcs t3.a %t/lib/../t3.bc --thin + +## Link from a different directory to ensure that thin archive member paths are +## resolved correctly relative to the archive locations. +RUN: mkdir %t/out && cd %t/out + +## Build a response file to share common linking arguments. +## Note: validate.py does not perform any compilation. Instead, it validates the +## received JSON, pretty-prints the JSON and the supplied arguments, and then +## exits with an error. This allows FileCheck directives to verify the +## distributor inputs. +RUN: echo "%t/t1.a %t/lib/t2.a ../t3.a \ +RUN: --thinlto-distributor=\"%python\" \ +RUN: --thinlto-distributor-arg=\"%llvm_src_root/utils/dtlto/validate.py\"" > rsp + +## Link thin archives using -u/--undefined. +RUN: not ld.lld @rsp -u t1 -u t2 -u t3 2>&1 | FileCheck %s + +## Link thin archives using --whole-archive. +RUN: not ld.lld --whole-archive @rsp 2>&1 | FileCheck %s + +## Check the module IDs in the JSON jobs description. +CHECK: "jobs": [ +CHECK: "inputs": [ +CHECK-NEXT: "{{([a-zA-Z]:)|/}} +CHECK-SAME: {{/|\\\\}}archive-thin.test.tmp{{/|\\\\}}t1.bc" + +CHECK: "inputs": [ +CHECK-NEXT: "{{([a-zA-Z]\:)|/}} +CHECK-SAME: {{/|\\\\}}archive-thin.test.tmp{{/|\\\\}}t2.bc" + +CHECK: "inputs": [ +CHECK-NEXT: "{{([a-zA-Z]:)|/}} +CHECK-SAME: {{/|\\\\}}archive-thin.test.tmp{{/|\\\\}}t3.bc" + +## Ensure backend compilation fails as expected (due to validate.py dummy behavior). +CHECK: error: DTLTO backend compilation: cannot open native object file: + +#--- t1.ll +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t1() { + ret void +} diff --git a/lld/test/ELF/loongarch-pc-hi20-lo12-got.s b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s new file mode 100644 index 0000000..acd9400 --- /dev/null +++ b/lld/test/ELF/loongarch-pc-hi20-lo12-got.s @@ -0,0 +1,145 @@ +# REQUIRES: loongarch +# RUN: rm -rf %t && split-file %s %t && cd %t + +# RUN: llvm-mc --filetype=obj --triple=loongarch64 a.s -o a.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 unpaired.s -o unpaired.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64 lone-ldr.s -o lone-ldr.o + +# RUN: ld.lld a.o -T within-range.t -o a +# RUN: llvm-objdump -d --no-show-raw-insn a | FileCheck %s + +## This test verifies the encoding when the register $a0 is used. +# CHECK: pcalau12i $a0, 0 +# CHECK-NEXT: addi.d $a0, $a0, -2048 + +## PCALAU12I contains a nonzero addend, no relaxations should be applied. +# CHECK-NEXT: pcalau12i $a1, 2 +# CHECK-NEXT: ld.d $a1, $a1, -2048 + +## LD contains a nonzero addend, no relaxations should be applied. +# CHECK-NEXT: pcalau12i $a2, 2 +# CHECK-NEXT: ld.d $a2, $a2, -2040 + +## PCALAU12I and LD use different registers, no relaxations should be applied. +# CHECK-NEXT: pcalau12i $a3, 2 +# CHECK-NEXT: ld.d $a4, $a3, -2048 + +## PCALAU12I and LD use different registers, no relaxations should be applied. +# CHECK-NEXT: pcalau12i $a5, 2 +# CHECK-NEXT: ld.d $a5, $a6, -2048 + +# RUN: ld.lld a.o -T underflow-range.t -o a-underflow +# RUN: llvm-objdump -d --no-show-raw-insn a-underflow | FileCheck --check-prefix=OUTRANGE %s + +# RUN: ld.lld a.o -T overflow-range.t -o a-overflow +# RUN: llvm-objdump -d --no-show-raw-insn a-overflow | FileCheck --check-prefix=OUTRANGE %s + +# OUTRANGE: pcalau12i $a0, 1 +# OUTRANGE-NEXT: ld.d $a0, $a0, 0 + +## Relocations do not appear in pairs, no relaxations should be applied. +# RUN: ld.lld unpaired.o -T within-range.t -o unpaired +# RUN: llvm-objdump --no-show-raw-insn -d unpaired | FileCheck --check-prefix=UNPAIRED %s + +# UNPAIRED: pcalau12i $a0, 2 +# UNPAIRED-NEXT: b 8 +# UNPAIRED-NEXT: pcalau12i $a0, 2 +# UNPAIRED: ld.d $a0, $a0, -2048 + +## Relocations do not appear in pairs, no relaxations should be applied. +# RUN: ld.lld lone-ldr.o -T within-range.t -o lone-ldr +# RUN: llvm-objdump --no-show-raw-insn -d lone-ldr | FileCheck --check-prefix=LONE-LDR %s + +# LONE-LDR: ld.d $a0, $a0, -2048 + +## 32-bit code is mostly the same. We only test a few variants. +# RUN: llvm-mc --filetype=obj --triple=loongarch32 a.32.s -o a.32.o +# RUN: ld.lld a.32.o -T within-range.t -o a32 +# RUN: llvm-objdump -d --no-show-raw-insn a32 | FileCheck --check-prefix=CHECK32 %s + +## This test verifies the encoding when the register $a0 is used. +# CHECK32: pcalau12i $a0, 0 +# CHECK32-NEXT: addi.w $a0, $a0, -2048 + + +## This linker script ensures that .rodata and .text are sufficiently close to +## each other so that the pcalau12i + ld pair can be relaxed to pcalau12i + add. +#--- within-range.t +SECTIONS { + .rodata 0x1800: { *(.rodata) } + .text 0x2800: { *(.text) } + .got 0x3800: { *(.got) } +} + +## This linker script ensures that .rodata and .text are sufficiently far apart +## so that the pcalau12i + ld pair cannot be relaxed to pcalau12i + add. +#--- underflow-range.t +SECTIONS { + .rodata 0x800-4: { *(.rodata) } + .got 0x80002000: { *(.got) } + .text 0x80001000: { *(.text) } /* (0x800-4)+2GB+0x800+4 */ +} + +#--- overflow-range.t +SECTIONS { + .text 0x1000: { *(.text) } + .got 0x2000: { *(.got) } + .rodata 0x80000800 : { *(.rodata) } /* 0x1000+2GB-0x800 */ +} + +#--- a.s +## Symbol 'x' is nonpreemptible, the optimization should be applied. +.rodata +.hidden x +x: +.word 10 + +.text +.global _start +_start: + pcalau12i $a0, %got_pc_hi20(x) + ld.d $a0, $a0, %got_pc_lo12(x) + pcalau12i $a1, %got_pc_hi20(x+1) + ld.d $a1, $a1, %got_pc_lo12(x) + pcalau12i $a2, %got_pc_hi20(x) + ld.d $a2, $a2, %got_pc_lo12(x+8) + pcalau12i $a3, %got_pc_hi20(x) + ld.d $a4, $a3, %got_pc_lo12(x) + pcalau12i $a5, %got_pc_hi20(x) + ld.d $a5, $a6, %got_pc_lo12(x) + +#--- unpaired.s +.text +.hidden x +x: + nop +.global _start +_start: + pcalau12i $a0, %got_pc_hi20(x) + b L + pcalau12i $a0, %got_pc_hi20(x) +L: + ld.d $a0, $a0, %got_pc_lo12(x) + +#--- lone-ldr.s +.text +.hidden x +x: + nop +.global _start +_start: + ld.d $a0, $a0, %got_pc_lo12(x) + + +#--- a.32.s +## Symbol 'x' is nonpreemptible, the optimization should be applied. +.rodata +.hidden x +x: +.word 10 + +.text +.global _start +_start: + pcalau12i $a0, %got_pc_hi20(x) + ld.w $a0, $a0, %got_pc_lo12(x) diff --git a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s index a33f866..08d5d3e 100644 --- a/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s +++ b/lld/test/ELF/loongarch-relax-pc-hi20-lo12.s @@ -31,24 +31,26 @@ ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0 # NORELAX32-NEXT: 10000: pcalau12i $a0, 1024 # NORELAX32-NEXT: addi.w $a0, $a0, 0 +## Not relaxation, convertion to PCRel. # NORELAX32-NEXT: pcalau12i $a0, 1024 -# NORELAX32-NEXT: ld.w $a0, $a0, 4 +# NORELAX32-NEXT: addi.w $a0, $a0, 0 # NORELAX32-NEXT: pcalau12i $a0, 1024 # NORELAX32-NEXT: addi.w $a0, $a0, 0 # NORELAX32-NEXT: pcalau12i $a0, 1024 -# NORELAX32-NEXT: ld.w $a0, $a0, 4 +# NORELAX32-NEXT: addi.w $a0, $a0, 0 # NORELAX64-LABEL: <_start>: ## offset exceed range of pcaddi ## offset = 0x410000 - 0x10000: 0x400 pages, page offset 0 # NORELAX64-NEXT: 10000: pcalau12i $a0, 1024 # NORELAX64-NEXT: addi.d $a0, $a0, 0 +## Not relaxation, convertion to PCRel. # NORELAX64-NEXT: pcalau12i $a0, 1024 -# NORELAX64-NEXT: ld.d $a0, $a0, 8 +# NORELAX64-NEXT: addi.d $a0, $a0, 0 # NORELAX64-NEXT: pcalau12i $a0, 1024 # NORELAX64-NEXT: addi.d $a0, $a0, 0 # NORELAX64-NEXT: pcalau12i $a0, 1024 -# NORELAX64-NEXT: ld.d $a0, $a0, 8 +# NORELAX64-NEXT: addi.d $a0, $a0, 0 ## GOT references with non-zero addends. No relaxation. diff --git a/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i b/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i index d7c68baf..99fe91b 100644 --- a/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i +++ b/lldb/bindings/interface/SBMemoryRegionInfoDocstrings.i @@ -1,5 +1,10 @@ %feature("docstring", -"API clients can get information about memory regions in processes." +"API clients can get information about memory regions in processes. + +For Python users, `len()` is overriden to output the size of the memory region in bytes. +For Python users, `str()` is overriden with the results of the GetDescription function- + produces a formatted string that describes a memory range in the form: + [Hex start - Hex End) with associated permissions (RWX)" ) lldb::SBMemoryRegionInfo; %feature("docstring", " @@ -29,3 +34,11 @@ Return the size of pages in this memory region. 0 will be returned if this information was unavailable." ) lldb::SBMemoryRegionInfo::GetPageSize(); + +%feature("docstring", " + Takes an SBStream parameter to write output to, + formatted [Hex start - Hex End) with associated permissions (RWX). + If the function results false, no output will be written. + If results true, the output will be written to the stream. + " +) lldb::SBMemoryRegionInfo::GetDescription;
\ No newline at end of file diff --git a/lldb/include/lldb/API/SBMemoryRegionInfo.h b/lldb/include/lldb/API/SBMemoryRegionInfo.h index f9a5dc9..dc5aa08 100644 --- a/lldb/include/lldb/API/SBMemoryRegionInfo.h +++ b/lldb/include/lldb/API/SBMemoryRegionInfo.h @@ -115,6 +115,17 @@ public: bool operator!=(const lldb::SBMemoryRegionInfo &rhs) const; + /// writes a description of the memory region to a SBStream. + /// + /// \param[in,out] description + /// A stream object where the description will be written. + /// + /// \return + /// Returns true if the description was successfully written, + /// false otherwise. + /// + /// The description format is: [Hex start - Hex End) with associated + /// permissions (RWX) bool GetDescription(lldb::SBStream &description); private: diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index 8bb55c9..8513e14 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -86,7 +86,8 @@ struct ModuleFunctionSearchOptions { /// /// The module will parse more detailed information as more queries are made. class Module : public std::enable_shared_from_this<Module>, - public SymbolContextScope { + public SymbolContextScope, + public UserID { public: class LookupInfo; // Static functions that can track the lifetime of module objects. This is diff --git a/lldb/include/lldb/Core/ModuleList.h b/lldb/include/lldb/Core/ModuleList.h index d5e291f3..6ecdcf1 100644 --- a/lldb/include/lldb/Core/ModuleList.h +++ b/lldb/include/lldb/Core/ModuleList.h @@ -352,6 +352,14 @@ public: // UUID values is very efficient and accurate. lldb::ModuleSP FindModule(const UUID &uuid) const; + /// Find a module by LLDB-specific unique identifier. + /// + /// \param[in] uid The UID of the module assigned to it on construction. + /// + /// \returns ModuleSP of module with \c uid. Returns nullptr if no such + /// module could be found. + lldb::ModuleSP FindModule(lldb::user_id_t uid) const; + /// Finds the first module whose file specification matches \a module_spec. lldb::ModuleSP FindFirstModule(const ModuleSpec &module_spec) const; diff --git a/lldb/include/lldb/Expression/Expression.h b/lldb/include/lldb/Expression/Expression.h index 8de9364..20067f4 100644 --- a/lldb/include/lldb/Expression/Expression.h +++ b/lldb/include/lldb/Expression/Expression.h @@ -13,6 +13,7 @@ #include <string> #include <vector> +#include "llvm/Support/FormatProviders.h" #include "lldb/Expression/ExpressionTypeSystemHelper.h" #include "lldb/lldb-forward.h" @@ -96,6 +97,62 @@ protected: ///invalid. }; +/// Holds parsed information about a function call label that +/// LLDB attaches as an AsmLabel to function AST nodes it parses +/// from debug-info. +/// +/// The format being: +/// +/// <prefix>:<module uid>:<symbol uid>:<name> +/// +/// The label string needs to stay valid for the entire lifetime +/// of this object. +struct FunctionCallLabel { + /// Unique identifier of the lldb_private::Module + /// which contains the symbol identified by \c symbol_id. + lldb::user_id_t module_id; + + /// Unique identifier of the function symbol on which to + /// perform the function call. For example, for DWARF this would + /// be the DIE UID. + lldb::user_id_t symbol_id; + + /// Name to use when searching for the function symbol in + /// \c module_id. For most function calls this will be a + /// mangled name. In cases where a mangled name can't be used, + /// this will be the function name. + /// + /// NOTE: kept as last element so we don't have to worry about + /// ':' in the mangled name when parsing the label. + llvm::StringRef lookup_name; + + /// Decodes the specified function \c label into a \c FunctionCallLabel. + static llvm::Expected<FunctionCallLabel> fromString(llvm::StringRef label); + + /// Encode this FunctionCallLabel into its string representation. + /// + /// The representation roundtrips through \c fromString: + /// \code{.cpp} + /// llvm::StringRef encoded = "$__lldb_func:0x0:0x0:_Z3foov"; + /// FunctionCallLabel label = *fromString(label); + /// + /// assert (label.toString() == encoded); + /// assert (*fromString(label.toString()) == label); + /// \endcode + std::string toString() const; +}; + +/// LLDB attaches this prefix to mangled names of functions that get called +/// from JITted expressions. +inline constexpr llvm::StringRef FunctionCallLabelPrefix = "$__lldb_func"; + } // namespace lldb_private +namespace llvm { +template <> struct format_provider<lldb_private::FunctionCallLabel> { + static void format(const lldb_private::FunctionCallLabel &label, + raw_ostream &OS, StringRef Style); +}; +} // namespace llvm + #endif // LLDB_EXPRESSION_EXPRESSION_H diff --git a/lldb/include/lldb/Symbol/SymbolFile.h b/lldb/include/lldb/Symbol/SymbolFile.h index e95f955..bbc615d 100644 --- a/lldb/include/lldb/Symbol/SymbolFile.h +++ b/lldb/include/lldb/Symbol/SymbolFile.h @@ -18,6 +18,7 @@ #include "lldb/Symbol/CompilerType.h" #include "lldb/Symbol/Function.h" #include "lldb/Symbol/SourceModule.h" +#include "lldb/Symbol/SymbolContext.h" #include "lldb/Symbol/Type.h" #include "lldb/Symbol/TypeList.h" #include "lldb/Symbol/TypeSystem.h" @@ -328,6 +329,18 @@ public: GetMangledNamesForFunction(const std::string &scope_qualified_name, std::vector<ConstString> &mangled_names); + /// Resolves the function corresponding to the specified LLDB function + /// call \c label. + /// + /// \param[in] label The FunctionCallLabel to be resolved. + /// + /// \returns An llvm::Error if the specified \c label couldn't be resolved. + /// Returns the resolved function (as a SymbolContext) otherwise. + virtual llvm::Expected<SymbolContext> + ResolveFunctionCallLabel(const FunctionCallLabel &label) { + return llvm::createStringError("Not implemented"); + } + virtual void GetTypes(lldb_private::SymbolContextScope *sc_scope, lldb::TypeClass type_mask, lldb_private::TypeList &type_list) = 0; diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index 90997da..f27a95d 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -130,8 +130,10 @@ Module *Module::GetAllocatedModuleAtIndex(size_t idx) { return nullptr; } +static std::atomic<lldb::user_id_t> g_unique_id = 1; + Module::Module(const ModuleSpec &module_spec) - : m_unwind_table(*this), m_file_has_changed(false), + : UserID(g_unique_id++), m_unwind_table(*this), m_file_has_changed(false), m_first_file_changed_log(false) { // Scope for locker below... { @@ -236,7 +238,8 @@ Module::Module(const ModuleSpec &module_spec) Module::Module(const FileSpec &file_spec, const ArchSpec &arch, ConstString object_name, lldb::offset_t object_offset, const llvm::sys::TimePoint<> &object_mod_time) - : m_mod_time(FileSystem::Instance().GetModificationTime(file_spec)), + : UserID(g_unique_id++), + m_mod_time(FileSystem::Instance().GetModificationTime(file_spec)), m_arch(arch), m_file(file_spec), m_object_name(object_name), m_object_offset(object_offset), m_object_mod_time(object_mod_time), m_unwind_table(*this), m_file_has_changed(false), @@ -257,7 +260,7 @@ Module::Module(const FileSpec &file_spec, const ArchSpec &arch, } Module::Module() - : m_unwind_table(*this), m_file_has_changed(false), + : UserID(g_unique_id++), m_unwind_table(*this), m_file_has_changed(false), m_first_file_changed_log(false) { std::lock_guard<std::recursive_mutex> guard( GetAllocationModuleCollectionMutex()); diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index d2e5be8..01f46b6 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -584,6 +584,20 @@ ModuleSP ModuleList::FindModule(const UUID &uuid) const { return module_sp; } +ModuleSP ModuleList::FindModule(lldb::user_id_t uid) const { + ModuleSP module_sp; + ForEach([&](const ModuleSP &m) { + if (m->GetID() == uid) { + module_sp = m; + return IterationAction::Stop; + } + + return IterationAction::Continue; + }); + + return module_sp; +} + void ModuleList::FindTypes(Module *search_first, const TypeQuery &query, TypeResults &results) const { std::lock_guard<std::recursive_mutex> guard(m_modules_mutex); diff --git a/lldb/source/Expression/Expression.cpp b/lldb/source/Expression/Expression.cpp index 93f585e..796851f 100644 --- a/lldb/source/Expression/Expression.cpp +++ b/lldb/source/Expression/Expression.cpp @@ -10,6 +10,11 @@ #include "lldb/Target/ExecutionContextScope.h" #include "lldb/Target/Target.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" + using namespace lldb_private; Expression::Expression(Target &target) @@ -26,3 +31,47 @@ Expression::Expression(ExecutionContextScope &exe_scope) m_jit_end_addr(LLDB_INVALID_ADDRESS) { assert(m_target_wp.lock()); } + +llvm::Expected<FunctionCallLabel> +lldb_private::FunctionCallLabel::fromString(llvm::StringRef label) { + llvm::SmallVector<llvm::StringRef, 4> components; + label.split(components, ":", /*MaxSplit=*/3); + + if (components.size() != 4) + return llvm::createStringError("malformed function call label."); + + if (components[0] != FunctionCallLabelPrefix) + return llvm::createStringError(llvm::formatv( + "expected function call label prefix '{0}' but found '{1}' instead.", + FunctionCallLabelPrefix, components[0])); + + llvm::StringRef module_label = components[1]; + llvm::StringRef die_label = components[2]; + + lldb::user_id_t module_id = 0; + if (!llvm::to_integer(module_label, module_id)) + return llvm::createStringError( + llvm::formatv("failed to parse module ID from '{0}'.", module_label)); + + lldb::user_id_t die_id; + if (!llvm::to_integer(die_label, die_id)) + return llvm::createStringError( + llvm::formatv("failed to parse symbol ID from '{0}'.", die_label)); + + return FunctionCallLabel{/*.module_id=*/module_id, + /*.symbol_id=*/die_id, + /*.lookup_name=*/components[3]}; +} + +std::string lldb_private::FunctionCallLabel::toString() const { + return llvm::formatv("{0}:{1:x}:{2:x}:{3}", FunctionCallLabelPrefix, + module_id, symbol_id, lookup_name) + .str(); +} + +void llvm::format_provider<FunctionCallLabel>::format( + const FunctionCallLabel &label, raw_ostream &OS, StringRef Style) { + OS << llvm::formatv("FunctionCallLabel{ module_id: {0:x}, symbol_id: {1:x}, " + "lookup_name: {2} }", + label.module_id, label.symbol_id, label.lookup_name); +} diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index 6f812b9..5e40df2 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/Support/Error.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" @@ -20,6 +21,7 @@ #include "lldb/Core/Disassembler.h" #include "lldb/Core/Module.h" #include "lldb/Core/Section.h" +#include "lldb/Expression/Expression.h" #include "lldb/Expression/IRExecutionUnit.h" #include "lldb/Expression/ObjectFileJIT.h" #include "lldb/Host/HostInfo.h" @@ -36,6 +38,7 @@ #include "lldb/Utility/LLDBAssert.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" +#include "lldb/lldb-defines.h" #include <optional> @@ -771,6 +774,40 @@ private: lldb::addr_t m_best_internal_load_address = LLDB_INVALID_ADDRESS; }; +/// Returns address of the function referred to by the special function call +/// label \c label. +static llvm::Expected<lldb::addr_t> +ResolveFunctionCallLabel(const FunctionCallLabel &label, + const lldb_private::SymbolContext &sc, + bool &symbol_was_missing_weak) { + symbol_was_missing_weak = false; + + if (!sc.target_sp) + return llvm::createStringError("target not available."); + + auto module_sp = sc.target_sp->GetImages().FindModule(label.module_id); + if (!module_sp) + return llvm::createStringError( + llvm::formatv("failed to find module by UID {0}", label.module_id)); + + auto *symbol_file = module_sp->GetSymbolFile(); + if (!symbol_file) + return llvm::createStringError( + llvm::formatv("no SymbolFile found on module {0:x}.", module_sp.get())); + + auto sc_or_err = symbol_file->ResolveFunctionCallLabel(label); + if (!sc_or_err) + return llvm::joinErrors( + llvm::createStringError("failed to resolve function by UID"), + sc_or_err.takeError()); + + SymbolContextList sc_list; + sc_list.Append(*sc_or_err); + + LoadAddressResolver resolver(*sc.target_sp, symbol_was_missing_weak); + return resolver.Resolve(sc_list).value_or(LLDB_INVALID_ADDRESS); +} + lldb::addr_t IRExecutionUnit::FindInSymbols(const std::vector<ConstString> &names, const lldb_private::SymbolContext &sc, @@ -906,6 +943,34 @@ lldb::addr_t IRExecutionUnit::FindInUserDefinedSymbols( lldb::addr_t IRExecutionUnit::FindSymbol(lldb_private::ConstString name, bool &missing_weak) { + if (name.GetStringRef().starts_with(FunctionCallLabelPrefix)) { + auto label_or_err = FunctionCallLabel::fromString(name); + if (!label_or_err) { + LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), label_or_err.takeError(), + "failed to create FunctionCallLabel from '{1}': {0}", + name.GetStringRef()); + return LLDB_INVALID_ADDRESS; + } + + if (auto addr_or_err = + ResolveFunctionCallLabel(*label_or_err, m_sym_ctx, missing_weak)) { + return *addr_or_err; + } else { + LLDB_LOG_ERROR(GetLog(LLDBLog::Expressions), addr_or_err.takeError(), + "Failed to resolve function call label '{1}': {0}", + name.GetStringRef()); + + // Fall back to lookup by name despite error in resolving the label. + // May happen in practice if the definition of a function lives in + // a different lldb_private::Module than it's declaration. Meaning + // we couldn't pin-point it using the information encoded in the label. + name.SetString(label_or_err->lookup_name); + } + } + + // TODO: now with function call labels, do we still need to + // generate alternate manglings? + std::vector<ConstString> candidate_C_names; std::vector<ConstString> candidate_CPlusPlus_names; diff --git a/lldb/source/Expression/IRInterpreter.cpp b/lldb/source/Expression/IRInterpreter.cpp index fa74e88..9140483 100644 --- a/lldb/source/Expression/IRInterpreter.cpp +++ b/lldb/source/Expression/IRInterpreter.cpp @@ -259,7 +259,9 @@ public: break; case Value::FunctionVal: if (const Function *constant_func = dyn_cast<Function>(constant)) { - lldb_private::ConstString name(constant_func->getName()); + lldb_private::ConstString name( + llvm::GlobalValue::dropLLVMManglingEscape( + constant_func->getName())); bool missing_weak = false; lldb::addr_t addr = m_execution_unit.FindSymbol(name, missing_weak); if (addr == LLDB_INVALID_ADDRESS) diff --git a/lldb/source/Interpreter/OptionArgParser.cpp b/lldb/source/Interpreter/OptionArgParser.cpp index 616f6e3..170f65a 100644 --- a/lldb/source/Interpreter/OptionArgParser.cpp +++ b/lldb/source/Interpreter/OptionArgParser.cpp @@ -161,7 +161,7 @@ lldb::addr_t OptionArgParser::ToRawAddress(const ExecutionContext *exe_ctx, lldb::addr_t fail_value, Status *error_ptr) { std::optional<lldb::addr_t> maybe_addr = DoToAddress(exe_ctx, s, error_ptr); - return maybe_addr ? *maybe_addr : fail_value; + return maybe_addr.value_or(fail_value); } lldb::addr_t OptionArgParser::ToAddress(const ExecutionContext *exe_ctx, diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp index 29927e3..b557787 100644 --- a/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp +++ b/lldb/source/Plugins/Process/Utility/RegisterContextThreadMemory.cpp @@ -153,14 +153,14 @@ uint32_t RegisterContextThreadMemory::ConvertRegisterKindToRegisterNumber( UpdateRegisterContext(); if (m_reg_ctx_sp) return m_reg_ctx_sp->ConvertRegisterKindToRegisterNumber(kind, num); - return false; + return LLDB_INVALID_REGNUM; } uint32_t RegisterContextThreadMemory::NumSupportedHardwareBreakpoints() { UpdateRegisterContext(); if (m_reg_ctx_sp) return m_reg_ctx_sp->NumSupportedHardwareBreakpoints(); - return false; + return 0; } uint32_t RegisterContextThreadMemory::SetHardwareBreakpoint(lldb::addr_t addr, diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp index 1340425..42dc579 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp @@ -498,9 +498,7 @@ PythonInteger::CreateStructuredSignedInteger() const { // PythonBoolean -PythonBoolean::PythonBoolean(bool value) { - SetValue(value); -} +PythonBoolean::PythonBoolean(bool value) { SetValue(value); } bool PythonBoolean::Check(PyObject *py_obj) { return py_obj ? PyBool_Check(py_obj) : false; @@ -539,7 +537,7 @@ bool PythonList::Check(PyObject *py_obj) { uint32_t PythonList::GetSize() const { if (IsValid()) - return PyList_GET_SIZE(m_py_obj); + return PyList_Size(m_py_obj); return 0; } @@ -618,7 +616,7 @@ bool PythonTuple::Check(PyObject *py_obj) { uint32_t PythonTuple::GetSize() const { if (IsValid()) - return PyTuple_GET_SIZE(m_py_obj); + return PyTuple_Size(m_py_obj); return 0; } @@ -856,15 +854,15 @@ PythonObject PythonCallable::operator()() { return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, nullptr)); } -PythonObject PythonCallable:: -operator()(std::initializer_list<PyObject *> args) { +PythonObject +PythonCallable::operator()(std::initializer_list<PyObject *> args) { PythonTuple arg_tuple(args); return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, arg_tuple.get())); } -PythonObject PythonCallable:: -operator()(std::initializer_list<PythonObject> args) { +PythonObject +PythonCallable::operator()(std::initializer_list<PythonObject> args) { PythonTuple arg_tuple(args); return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, arg_tuple.get())); @@ -899,7 +897,7 @@ bool PythonFile::Check(PyObject *py_obj) { const char *PythonException::toCString() const { if (!m_repr_bytes) return "unknown exception"; - return PyBytes_AS_STRING(m_repr_bytes); + return PyBytes_AsString(m_repr_bytes); } PythonException::PythonException(const char *caller) { @@ -1424,8 +1422,7 @@ Error PythonScript::Init() { auto builtins = PythonModule::BuiltinsModule(); if (Error error = globals.SetItem("__builtins__", builtins)) return error; - PyObject *o = - PyRun_String(script, Py_file_input, globals.get(), globals.get()); + PyObject *o = RunString(script, Py_file_input, globals.get(), globals.get()); if (!o) return exception(); Take<PythonObject>(o); @@ -1469,11 +1466,49 @@ python::runStringMultiLine(const llvm::Twine &string, const PythonDictionary &locals) { if (!globals.IsValid() || !locals.IsValid()) return nullDeref(); - PyObject *result = PyRun_String(NullTerminated(string), Py_file_input, - globals.get(), locals.get()); + PyObject *result = RunString(NullTerminated(string), Py_file_input, + globals.get(), locals.get()); if (!result) return exception(); return Take<PythonObject>(result); } +namespace lldb_private { +namespace python { +PyObject *RunString(const char *str, int start, PyObject *globals, + PyObject *locals) { + const char *filename = "<string>"; + + // Compile the string into a code object. + PyObject *code = Py_CompileString(str, filename, start); + if (!code) + return nullptr; + + // Execute the code object. + PyObject *result = PyEval_EvalCode(code, globals, locals); + + // Clean up the code object. + Py_DECREF(code); + + return result; +} + +int RunSimpleString(const char *str) { + PyObject *main_module = PyImport_AddModule("__main__"); + if (!main_module) + return -1; + + PyObject *globals = PyModule_GetDict(main_module); + if (!globals) + return -1; + + PyObject *result = RunString(str, Py_file_input, globals, globals); + if (!result) + return -1; + + return 0; +} +} // namespace python +} // namespace lldb_private + #endif diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h index 88c1bb7..45bb499 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.h @@ -194,8 +194,8 @@ template <typename T, char F> struct PassthroughFormat { }; template <> struct PythonFormat<char *> : PassthroughFormat<char *, 's'> {}; -template <> struct PythonFormat<const char *> : - PassthroughFormat<const char *, 's'> {}; +template <> +struct PythonFormat<const char *> : PassthroughFormat<const char *, 's'> {}; template <> struct PythonFormat<char> : PassthroughFormat<char, 'b'> {}; template <> struct PythonFormat<unsigned char> : PassthroughFormat<unsigned char, 'B'> {}; @@ -250,13 +250,6 @@ public: void Reset(); - void Dump() const { - if (m_py_obj) - _PyObject_Dump(m_py_obj); - else - puts("NULL"); - } - void Dump(Stream &strm) const; PyObject *get() const { return m_py_obj; } @@ -780,6 +773,10 @@ private: operator=(const StructuredPythonObject &) = delete; }; +PyObject *RunString(const char *str, int start, PyObject *globals, + PyObject *locals); +int RunSimpleString(const char *str); + } // namespace python } // namespace lldb_private diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp index ce77569..300518f2 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp @@ -420,13 +420,13 @@ ScriptInterpreterPythonImpl::ScriptInterpreterPythonImpl(Debugger &debugger) run_string.Printf("%s = dict()", m_dictionary_name.c_str()); Locker locker(this, Locker::AcquireLock, Locker::FreeAcquiredLock); - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); run_string.Clear(); run_string.Printf( "run_one_line (%s, 'import copy, keyword, os, re, sys, uuid, lldb')", m_dictionary_name.c_str()); - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); // Reloading modules requires a different syntax in Python 2 and Python 3. // This provides a consistent syntax no matter what version of Python. @@ -434,7 +434,7 @@ ScriptInterpreterPythonImpl::ScriptInterpreterPythonImpl(Debugger &debugger) run_string.Printf( "run_one_line (%s, 'from importlib import reload as reload_module')", m_dictionary_name.c_str()); - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); // WARNING: temporary code that loads Cocoa formatters - this should be done // on a per-platform basis rather than loading the whole set and letting the @@ -444,20 +444,20 @@ ScriptInterpreterPythonImpl::ScriptInterpreterPythonImpl(Debugger &debugger) run_string.Printf( "run_one_line (%s, 'import lldb.formatters, lldb.formatters.cpp')", m_dictionary_name.c_str()); - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); run_string.Clear(); run_string.Printf("run_one_line (%s, 'import lldb.embedded_interpreter; from " "lldb.embedded_interpreter import run_python_interpreter; " "from lldb.embedded_interpreter import run_one_line')", m_dictionary_name.c_str()); - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); run_string.Clear(); run_string.Printf("run_one_line (%s, 'lldb.debugger_unique_id = %" PRIu64 "')", m_dictionary_name.c_str(), m_debugger.GetID()); - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); } ScriptInterpreterPythonImpl::~ScriptInterpreterPythonImpl() { @@ -572,8 +572,8 @@ void ScriptInterpreterPythonImpl::LeaveSession() { log->PutCString("ScriptInterpreterPythonImpl::LeaveSession()"); // Unset the LLDB global variables. - PyRun_SimpleString("lldb.debugger = None; lldb.target = None; lldb.process " - "= None; lldb.thread = None; lldb.frame = None"); + RunSimpleString("lldb.debugger = None; lldb.target = None; lldb.process " + "= None; lldb.thread = None; lldb.frame = None"); // checking that we have a valid thread state - since we use our own // threading and locking in some (rare) cases during cleanup Python may end @@ -674,7 +674,7 @@ bool ScriptInterpreterPythonImpl::EnterSession(uint16_t on_entry_flags, run_string.PutCString("')"); } - PyRun_SimpleString(run_string.GetData()); + RunSimpleString(run_string.GetData()); run_string.Clear(); PythonDictionary &sys_module_dict = GetSysModuleDictionary(); @@ -816,9 +816,9 @@ bool ScriptInterpreterPythonImpl::ExecuteOneLine( if (!command.empty()) { // We want to call run_one_line, passing in the dictionary and the command - // string. We cannot do this through PyRun_SimpleString here because the + // string. We cannot do this through RunSimpleString here because the // command string may contain escaped characters, and putting it inside - // another string to pass to PyRun_SimpleString messes up the escaping. So + // another string to pass to RunSimpleString messes up the escaping. So // we use the following more complicated method to pass the command string // directly down to Python. llvm::Expected<std::unique_ptr<ScriptInterpreterIORedirect>> @@ -3057,7 +3057,7 @@ void ScriptInterpreterPythonImpl::Initialize() { // Update the path python uses to search for modules to include the current // directory. - PyRun_SimpleString("import sys"); + RunSimpleString("import sys"); AddToSysPath(AddLocation::End, "."); // Don't denormalize paths when calling file_spec.GetPath(). On platforms @@ -3069,10 +3069,10 @@ void ScriptInterpreterPythonImpl::Initialize() { if (FileSpec file_spec = HostInfo::GetShlibDir()) AddToSysPath(AddLocation::Beginning, file_spec.GetPath(false)); - PyRun_SimpleString("sys.dont_write_bytecode = 1; import " - "lldb.embedded_interpreter; from " - "lldb.embedded_interpreter import run_python_interpreter; " - "from lldb.embedded_interpreter import run_one_line"); + RunSimpleString("sys.dont_write_bytecode = 1; import " + "lldb.embedded_interpreter; from " + "lldb.embedded_interpreter import run_python_interpreter; " + "from lldb.embedded_interpreter import run_one_line"); #if LLDB_USE_PYTHON_SET_INTERRUPT // Python will not just overwrite its internal SIGINT handler but also the @@ -3084,13 +3084,13 @@ void ScriptInterpreterPythonImpl::Initialize() { // normal Python REPL signal handler which raises a KeyboardInterrupt. // Also make sure to not pollute the user's REPL with the signal module nor // our utility function. - PyRun_SimpleString("def lldb_setup_sigint_handler():\n" - " import signal;\n" - " def signal_handler(sig, frame):\n" - " raise KeyboardInterrupt()\n" - " signal.signal(signal.SIGINT, signal_handler);\n" - "lldb_setup_sigint_handler();\n" - "del lldb_setup_sigint_handler\n"); + RunSimpleString("def lldb_setup_sigint_handler():\n" + " import signal;\n" + " def signal_handler(sig, frame):\n" + " raise KeyboardInterrupt()\n" + " signal.signal(signal.SIGINT, signal_handler);\n" + "lldb_setup_sigint_handler();\n" + "del lldb_setup_sigint_handler\n"); #endif } @@ -3106,7 +3106,7 @@ void ScriptInterpreterPythonImpl::AddToSysPath(AddLocation location, statement.append(path); statement.append("\")"); } - PyRun_SimpleString(statement.c_str()); + RunSimpleString(statement.c_str()); } // We are intentionally NOT calling Py_Finalize here (this would be the logical diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h index 4698b82a..83b64b8 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPythonImpl.h @@ -475,7 +475,7 @@ public: StreamString run_string; run_string.Printf("run_python_interpreter (%s)", m_python->GetDictionaryName()); - PyRun_SimpleString(run_string.GetData()); + python::RunSimpleString(run_string.GetData()); } } SetIsDone(true); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index e58e28a..781c1c6c 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -24,6 +24,7 @@ #include "Plugins/Language/ObjC/ObjCLanguage.h" #include "lldb/Core/Module.h" #include "lldb/Core/Value.h" +#include "lldb/Expression/Expression.h" #include "lldb/Host/Host.h" #include "lldb/Symbol/CompileUnit.h" #include "lldb/Symbol/Function.h" @@ -254,7 +255,40 @@ static std::string MakeLLDBFuncAsmLabel(const DWARFDIE &die) { if (!name) return {}; - return name; + SymbolFileDWARF *dwarf = die.GetDWARF(); + if (!dwarf) + return {}; + + auto get_module_id = [&](SymbolFile *sym) { + if (!sym) + return LLDB_INVALID_UID; + + auto *obj = sym->GetMainObjectFile(); + if (!obj) + return LLDB_INVALID_UID; + + auto module_sp = obj->GetModule(); + if (!module_sp) + return LLDB_INVALID_UID; + + return module_sp->GetID(); + }; + + lldb::user_id_t module_id = get_module_id(dwarf->GetDebugMapSymfile()); + if (module_id == LLDB_INVALID_UID) + module_id = get_module_id(dwarf); + + if (module_id == LLDB_INVALID_UID) + return {}; + + const auto die_id = die.GetID(); + if (die_id == LLDB_INVALID_UID) + return {}; + + return FunctionCallLabel{/*module_id=*/module_id, + /*symbol_id=*/die_id, + /*.lookup_name=*/name} + .toString(); } TypeSP DWARFASTParserClang::ParseTypeFromClangModule(const SymbolContext &sc, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 2c3f050..a3ba061 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -2476,6 +2476,55 @@ bool SymbolFileDWARF::ResolveFunction(const DWARFDIE &orig_die, return false; } +llvm::Expected<SymbolContext> +SymbolFileDWARF::ResolveFunctionCallLabel(const FunctionCallLabel &label) { + std::lock_guard<std::recursive_mutex> guard(GetModuleMutex()); + + DWARFDIE die = GetDIE(label.symbol_id); + if (!die.IsValid()) + return llvm::createStringError( + llvm::formatv("invalid DIE ID in {0}", label)); + + // Label was created using a declaration DIE. Need to fetch the definition + // to resolve the function call. + if (die.GetAttributeValueAsUnsigned(llvm::dwarf::DW_AT_declaration, 0)) { + Module::LookupInfo info(ConstString(label.lookup_name), + lldb::eFunctionNameTypeFull, + lldb::eLanguageTypeUnknown); + + m_index->GetFunctions(info, *this, {}, [&](DWARFDIE entry) { + if (entry.GetAttributeValueAsUnsigned(llvm::dwarf::DW_AT_declaration, 0)) + return IterationAction::Continue; + + // We don't check whether the specification DIE for this function + // corresponds to the declaration DIE because the declaration might be in + // a type-unit but the definition in the compile-unit (and it's + // specifcation would point to the declaration in the compile-unit). We + // rely on the mangled name within the module to be enough to find us the + // unique definition. + die = entry; + return IterationAction::Stop; + }); + + if (die.GetAttributeValueAsUnsigned(llvm::dwarf::DW_AT_declaration, 0)) + return llvm::createStringError( + llvm::formatv("failed to find definition DIE for {0}", label)); + } + + SymbolContextList sc_list; + if (!ResolveFunction(die, /*include_inlines=*/false, sc_list)) + return llvm::createStringError( + llvm::formatv("failed to resolve function for {0}", label)); + + if (sc_list.IsEmpty()) + return llvm::createStringError( + llvm::formatv("failed to find function for {0}", label)); + + assert(sc_list.GetSize() == 1); + + return sc_list[0]; +} + bool SymbolFileDWARF::DIEInDeclContext(const CompilerDeclContext &decl_ctx, const DWARFDIE &die, bool only_root_namespaces) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index 56d8ccb..3ec538d 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -436,6 +436,9 @@ protected: DIEArray MergeBlockAbstractParameters(const DWARFDIE &block_die, DIEArray &&variable_dies); + llvm::Expected<SymbolContext> + ResolveFunctionCallLabel(const FunctionCallLabel &label) override; + // Given a die_offset, figure out the symbol context representing that die. bool ResolveFunction(const DWARFDIE &die, bool include_inlines, SymbolContextList &sc_list); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index dd94f0b..9d7452a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -1602,3 +1602,14 @@ void SymbolFileDWARFDebugMap::GetCompileOptions( return IterationAction::Continue; }); } + +llvm::Expected<SymbolContext> SymbolFileDWARFDebugMap::ResolveFunctionCallLabel( + const FunctionCallLabel &label) { + const uint64_t oso_idx = GetOSOIndexFromUserID(label.symbol_id); + SymbolFileDWARF *oso_dwarf = GetSymbolFileByOSOIndex(oso_idx); + if (!oso_dwarf) + return llvm::createStringError(llvm::formatv( + "couldn't find symbol file for {0} in debug-map.", label)); + + return oso_dwarf->ResolveFunctionCallLabel(label); +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index f074b17..e1f1df23 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -144,6 +144,9 @@ public: void GetCompileOptions(std::unordered_map<lldb::CompUnitSP, Args> &args) override; + llvm::Expected<SymbolContext> + ResolveFunctionCallLabel(const FunctionCallLabel &label) override; + protected: enum { kHaveInitializedOSOs = (1 << 0), kNumFlags }; diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index fbd7f3e..9301f92 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -60,6 +60,7 @@ #include "lldb/Core/Module.h" #include "lldb/Core/PluginManager.h" #include "lldb/Core/UniqueCStringMap.h" +#include "lldb/Expression/Expression.h" #include "lldb/Host/StreamFile.h" #include "lldb/Symbol/ObjectFile.h" #include "lldb/Symbol/SymbolFile.h" @@ -9067,6 +9068,21 @@ ConstString TypeSystemClang::DeclGetName(void *opaque_decl) { return ConstString(); } +static ConstString +ExtractMangledNameFromFunctionCallLabel(llvm::StringRef label) { + auto label_or_err = FunctionCallLabel::fromString(label); + if (!label_or_err) { + llvm::consumeError(label_or_err.takeError()); + return {}; + } + + llvm::StringRef mangled = label_or_err->lookup_name; + if (Mangled::IsMangledName(mangled)) + return ConstString(mangled); + + return {}; +} + ConstString TypeSystemClang::DeclGetMangledName(void *opaque_decl) { clang::NamedDecl *nd = llvm::dyn_cast_or_null<clang::NamedDecl>( static_cast<clang::Decl *>(opaque_decl)); @@ -9078,6 +9094,13 @@ ConstString TypeSystemClang::DeclGetMangledName(void *opaque_decl) { if (!mc || !mc->shouldMangleCXXName(nd)) return {}; + // We have an LLDB FunctionCallLabel instead of an ordinary mangled name. + // Extract the mangled name out of this label. + if (const auto *label = nd->getAttr<AsmLabelAttr>()) + if (ConstString mangled = + ExtractMangledNameFromFunctionCallLabel(label->getLabel())) + return mangled; + llvm::SmallVector<char, 1024> buf; llvm::raw_svector_ostream llvm_ostrm(buf); if (llvm::isa<clang::CXXConstructorDecl>(nd)) { diff --git a/lldb/test/API/functionalities/memory/cache/main.cpp b/lldb/test/API/functionalities/memory/cache/main.cpp index 44d85c5e..5d2683f 100644 --- a/lldb/test/API/functionalities/memory/cache/main.cpp +++ b/lldb/test/API/functionalities/memory/cache/main.cpp @@ -1,5 +1,9 @@ -int main () -{ - int my_ints[] = {0x42}; - return 0; // Set break point at this line. +int test() { + int my_ints[] = {0x42}; + return 0; // Set break point at this line. +} + +int main() { + int dummy[100]; + return test(); } diff --git a/lldb/test/API/functionalities/tail_call_frames/cross_dso/TestCrossDSOTailCalls.py b/lldb/test/API/functionalities/tail_call_frames/cross_dso/TestCrossDSOTailCalls.py index 7c3d09b..0ca2f9e5 100644 --- a/lldb/test/API/functionalities/tail_call_frames/cross_dso/TestCrossDSOTailCalls.py +++ b/lldb/test/API/functionalities/tail_call_frames/cross_dso/TestCrossDSOTailCalls.py @@ -11,7 +11,6 @@ class TestCrossDSOTailCalls(TestBase): @skipIf(compiler="clang", compiler_version=["<", "10.0"]) @skipIf(dwarf_version=["<", "4"]) @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr26265") - @expectedFailureAll(archs=["arm$", "arm64", "aarch64"], bugnumber="llvm.org/PR44561") def test_cross_dso_tail_calls(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/functionalities/tail_call_frames/cross_object/TestCrossObjectTailCalls.py b/lldb/test/API/functionalities/tail_call_frames/cross_object/TestCrossObjectTailCalls.py index 180f4d3..b5de75e 100644 --- a/lldb/test/API/functionalities/tail_call_frames/cross_object/TestCrossObjectTailCalls.py +++ b/lldb/test/API/functionalities/tail_call_frames/cross_object/TestCrossObjectTailCalls.py @@ -11,7 +11,6 @@ class TestCrossObjectTailCalls(TestBase): @skipIf(compiler="clang", compiler_version=["<", "10.0"]) @skipIf(dwarf_version=["<", "4"]) @expectedFailureAll(oslist=["windows"], bugnumber="llvm.org/pr26265") - @expectedFailureAll(archs=["arm$", "arm64", "aarch64"], bugnumber="llvm.org/PR44561") def test_cross_object_tail_calls(self): self.build() exe = self.getBuildArtifact("a.out") diff --git a/lldb/test/API/lang/cpp/expr-definition-in-dylib/Makefile b/lldb/test/API/lang/cpp/expr-definition-in-dylib/Makefile new file mode 100644 index 0000000..82daeb1 --- /dev/null +++ b/lldb/test/API/lang/cpp/expr-definition-in-dylib/Makefile @@ -0,0 +1,6 @@ +CXX_SOURCES := main.cpp + +DYLIB_CXX_SOURCES := lib.cpp +DYLIB_NAME := lib + +include Makefile.rules diff --git a/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py b/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py new file mode 100644 index 0000000..02c34b3 --- /dev/null +++ b/lldb/test/API/lang/cpp/expr-definition-in-dylib/TestExprDefinitionInDylib.py @@ -0,0 +1,33 @@ +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class ExprDefinitionInDylibTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipIfWindows + def test(self): + """ + Tests that we can call functions whose definition + is in a different LLDB module than it's declaration. + """ + self.build() + + target = self.dbg.CreateTarget(self.getBuildArtifact("a.out")) + self.assertTrue(target, VALID_TARGET) + + env = self.registerSharedLibrariesWithTarget(target, ["lib"]) + + breakpoint = lldbutil.run_break_set_by_file_and_line( + self, "main.cpp", line_number("main.cpp", "return") + ) + + process = target.LaunchSimple(None, env, self.get_process_working_directory()) + + self.assertIsNotNone( + lldbutil.get_one_thread_stopped_at_breakpoint_id(self.process(), breakpoint) + ) + + self.expect_expr("f.method()", result_value="-72", result_type="int") diff --git a/lldb/test/API/lang/cpp/expr-definition-in-dylib/lib.cpp b/lldb/test/API/lang/cpp/expr-definition-in-dylib/lib.cpp new file mode 100644 index 0000000..ad148ce --- /dev/null +++ b/lldb/test/API/lang/cpp/expr-definition-in-dylib/lib.cpp @@ -0,0 +1,3 @@ +#include "lib.h" + +int Foo::method() { return -72; } diff --git a/lldb/test/API/lang/cpp/expr-definition-in-dylib/lib.h b/lldb/test/API/lang/cpp/expr-definition-in-dylib/lib.h new file mode 100644 index 0000000..9568db2 --- /dev/null +++ b/lldb/test/API/lang/cpp/expr-definition-in-dylib/lib.h @@ -0,0 +1,8 @@ +#ifndef LIB_H_IN +#define LIB_H_IN + +struct Foo { + int method(); +}; + +#endif // LIB_H_IN diff --git a/lldb/test/API/lang/cpp/expr-definition-in-dylib/main.cpp b/lldb/test/API/lang/cpp/expr-definition-in-dylib/main.cpp new file mode 100644 index 0000000..2fddb2b --- /dev/null +++ b/lldb/test/API/lang/cpp/expr-definition-in-dylib/main.cpp @@ -0,0 +1,6 @@ +#include "lib.h" + +int main() { + Foo f; + return f.method(); +} diff --git a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py index db43dba..1143cd9 100644 --- a/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py +++ b/lldb/test/API/tools/lldb-dap/coreFile/TestDAP_coreFile.py @@ -26,6 +26,7 @@ class TestDAP_coreFile(lldbdap_testcase.DAPTestCaseBase): "column": 0, "id": 524288, "line": 4, + "moduleId": "01DF54A6-045E-657D-3F8F-FB9CE1118789-14F8BD6D", "name": "bar", "source": {"name": "main.c", "path": "/home/labath/test/main.c"}, "instructionPointerReference": "0x40011C", @@ -34,6 +35,7 @@ class TestDAP_coreFile(lldbdap_testcase.DAPTestCaseBase): "column": 0, "id": 524289, "line": 10, + "moduleId": "01DF54A6-045E-657D-3F8F-FB9CE1118789-14F8BD6D", "name": "foo", "source": {"name": "main.c", "path": "/home/labath/test/main.c"}, "instructionPointerReference": "0x400142", @@ -42,6 +44,7 @@ class TestDAP_coreFile(lldbdap_testcase.DAPTestCaseBase): "column": 0, "id": 524290, "line": 16, + "moduleId": "01DF54A6-045E-657D-3F8F-FB9CE1118789-14F8BD6D", "name": "_start", "source": {"name": "main.c", "path": "/home/labath/test/main.c"}, "instructionPointerReference": "0x40015F", diff --git a/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py index abd4692..fd2037b 100644 --- a/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py +++ b/lldb/test/API/tools/lldb-dap/stackTrace/TestDAP_stackTrace.py @@ -242,3 +242,36 @@ class TestDAP_stackTrace(lldbdap_testcase.DAPTestCaseBase): frame = self.get_stackFrames(format={"parameters": False, "module": True})[0] self.assertEqual(frame["name"], "a.out recurse") + + @skipIfWindows + def test_stack_frame_module_id(self): + program = self.getBuildArtifact("a.out") + self.build_and_launch(program) + source = "main.c" + lines = [line_number(source, "recurse end")] + breakpoint_ids = self.set_source_breakpoints(source, lines) + self.assertEqual( + len(breakpoint_ids), len(lines), "expect correct number of breakpoints" + ) + + self.continue_to_breakpoints(breakpoint_ids) + + modules = self.dap_server.get_modules() + name_to_id = { + name: info["id"] for name, info in modules.items() if "id" in info + } + + stack_frames = self.get_stackFrames() + for frame in stack_frames: + module_id = frame.get("moduleId") + source_name = frame.get("source", {}).get("name") + if module_id is None or source_name is None: + continue + + if source_name in name_to_id: + expected_id = name_to_id[source_name] + self.assertEqual( + module_id, + expected_id, + f"Expected moduleId '{expected_id}' for {source_name}, got: {module_id}", + ) diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index f42c502..4f26599 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -550,6 +550,13 @@ llvm::json::Value CreateStackFrame(DAP &dap, lldb::SBFrame &frame, if (frame.IsArtificial() || frame.IsHidden()) object.try_emplace("presentationHint", "subtle"); + lldb::SBModule module = frame.GetModule(); + if (module.IsValid()) { + std::string uuid = module.GetUUIDString(); + if (!uuid.empty()) + object.try_emplace("moduleId", uuid); + } + return llvm::json::Value(std::move(object)); } diff --git a/lldb/unittests/Expression/CMakeLists.txt b/lldb/unittests/Expression/CMakeLists.txt index 533cdc6..4c58b3c 100644 --- a/lldb/unittests/Expression/CMakeLists.txt +++ b/lldb/unittests/Expression/CMakeLists.txt @@ -4,6 +4,7 @@ add_lldb_unittest(ExpressionTests DiagnosticManagerTest.cpp DWARFExpressionTest.cpp CppModuleConfigurationTest.cpp + ExpressionTest.cpp LINK_LIBS lldbCore diff --git a/lldb/unittests/Expression/ExpressionTest.cpp b/lldb/unittests/Expression/ExpressionTest.cpp new file mode 100644 index 0000000..12f6dd5 --- /dev/null +++ b/lldb/unittests/Expression/ExpressionTest.cpp @@ -0,0 +1,122 @@ +//===-- ExpressionTest.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include "TestingSupport/TestUtilities.h" +#include "lldb/Expression/Expression.h" +#include "llvm/Testing/Support/Error.h" + +using namespace lldb_private; + +struct LabelTestCase { + llvm::StringRef encoded; + FunctionCallLabel label; + llvm::SmallVector<llvm::StringRef> error_pattern; +}; + +static LabelTestCase g_label_test_cases[] = { + // Failure modes + {"bar:0x0:0x0:_Z3foov", + {}, + {"expected function call label prefix '$__lldb_func' but found 'bar' " + "instead."}}, + {"$__lldb_func :0x0:0x0:_Z3foov", + {}, + {"expected function call label prefix '$__lldb_func' but found " + "'$__lldb_func ' instead."}}, + {"$__lldb_funcc:0x0:0x0:_Z3foov", + {}, + {"expected function call label prefix '$__lldb_func' but found " + "'$__lldb_funcc' instead."}}, + {"", {}, {"malformed function call label."}}, + {"foo", {}, {"malformed function call label."}}, + {"$__lldb_func", {}, {"malformed function call label."}}, + {"$__lldb_func:", {}, {"malformed function call label."}}, + {"$__lldb_func:0x0:0x0", {}, {"malformed function call label."}}, + {"$__lldb_func:abc:0x0:_Z3foov", + {}, + {"failed to parse module ID from 'abc'."}}, + {"$__lldb_func:-1:0x0:_Z3foov", + {}, + {"failed to parse module ID from '-1'."}}, + {"$__lldb_func:0x0invalid:0x0:_Z3foov", + {}, + {"failed to parse module ID from '0x0invalid'."}}, + {"$__lldb_func:0x0 :0x0:_Z3foov", + {}, + {"failed to parse module ID from '0x0 '."}}, + {"$__lldb_func:0x0:abc:_Z3foov", + {}, + {"failed to parse symbol ID from 'abc'."}}, + {"$__lldb_func:0x5:-1:_Z3foov", + {}, + {"failed to parse symbol ID from '-1'."}}, + {"$__lldb_func:0x5:0x0invalid:_Z3foov", + {}, + {"failed to parse symbol ID from '0x0invalid'."}}, + {"$__lldb_func:0x5:0x0 :_Z3foov", + {}, + {"failed to parse symbol ID from '0x0 '."}}, + {"$__lldb_func:0x0:0x0:_Z3foov", + { + /*.module_id=*/0x0, + /*.symbol_id=*/0x0, + /*.lookup_name=*/"_Z3foov", + }, + {}}, + {"$__lldb_func:0x0:0x0:abc:def:::a", + { + /*.module_id=*/0x0, + /*.symbol_id=*/0x0, + /*.lookup_name=*/"abc:def:::a", + }, + {}}, + {"$__lldb_func:0xd2:0xf0:$__lldb_func", + { + /*.module_id=*/0xd2, + /*.symbol_id=*/0xf0, + /*.lookup_name=*/"$__lldb_func", + }, + {}}, +}; + +struct ExpressionTestFixture : public testing::TestWithParam<LabelTestCase> {}; + +TEST_P(ExpressionTestFixture, FunctionCallLabel) { + const auto &[encoded, label, errors] = GetParam(); + + auto decoded_or_err = FunctionCallLabel::fromString(encoded); + if (!errors.empty()) { + EXPECT_THAT_EXPECTED( + decoded_or_err, + llvm::FailedWithMessageArray(testing::ElementsAreArray(errors))); + return; + } + + EXPECT_THAT_EXPECTED(decoded_or_err, llvm::Succeeded()); + + auto label_str = label.toString(); + EXPECT_EQ(decoded_or_err->toString(), encoded); + EXPECT_EQ(label_str, encoded); + + EXPECT_EQ(decoded_or_err->module_id, label.module_id); + EXPECT_EQ(decoded_or_err->symbol_id, label.symbol_id); + EXPECT_EQ(decoded_or_err->lookup_name, label.lookup_name); + + auto roundtrip_or_err = FunctionCallLabel::fromString(label_str); + EXPECT_THAT_EXPECTED(roundtrip_or_err, llvm::Succeeded()); + + EXPECT_EQ(roundtrip_or_err->module_id, label.module_id); + EXPECT_EQ(roundtrip_or_err->symbol_id, label.symbol_id); + EXPECT_EQ(roundtrip_or_err->lookup_name, label.lookup_name); +} + +INSTANTIATE_TEST_SUITE_P(FunctionCallLabelTest, ExpressionTestFixture, + testing::ValuesIn(g_label_test_cases)); diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp index 2dd92fc..0d4b04b 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonDataObjectsTests.cpp @@ -632,8 +632,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { ASSERT_FALSE(error); { - PyObject *o = PyRun_String("lambda x : x", Py_eval_input, globals.get(), - globals.get()); + PyObject *o = + RunString("lambda x : x", Py_eval_input, globals.get(), globals.get()); ASSERT_FALSE(o == NULL); auto lambda = Take<PythonCallable>(o); auto arginfo = lambda.GetArgInfo(); @@ -642,8 +642,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { } { - PyObject *o = PyRun_String("lambda x,y=0: x", Py_eval_input, globals.get(), - globals.get()); + PyObject *o = RunString("lambda x,y=0: x", Py_eval_input, globals.get(), + globals.get()); ASSERT_FALSE(o == NULL); auto lambda = Take<PythonCallable>(o); auto arginfo = lambda.GetArgInfo(); @@ -652,8 +652,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { } { - PyObject *o = PyRun_String("lambda x,y=0, **kw: x", Py_eval_input, - globals.get(), globals.get()); + PyObject *o = RunString("lambda x,y=0, **kw: x", Py_eval_input, + globals.get(), globals.get()); ASSERT_FALSE(o == NULL); auto lambda = Take<PythonCallable>(o); auto arginfo = lambda.GetArgInfo(); @@ -662,8 +662,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { } { - PyObject *o = PyRun_String("lambda x,y,*a: x", Py_eval_input, globals.get(), - globals.get()); + PyObject *o = RunString("lambda x,y,*a: x", Py_eval_input, globals.get(), + globals.get()); ASSERT_FALSE(o == NULL); auto lambda = Take<PythonCallable>(o); auto arginfo = lambda.GetArgInfo(); @@ -673,8 +673,8 @@ TEST_F(PythonDataObjectsTest, TestCallable) { } { - PyObject *o = PyRun_String("lambda x,y,*a,**kw: x", Py_eval_input, - globals.get(), globals.get()); + PyObject *o = RunString("lambda x,y,*a,**kw: x", Py_eval_input, + globals.get(), globals.get()); ASSERT_FALSE(o == NULL); auto lambda = Take<PythonCallable>(o); auto arginfo = lambda.GetArgInfo(); @@ -713,7 +713,7 @@ class NewStyle(object): )"; PyObject *o = - PyRun_String(script, Py_file_input, globals.get(), globals.get()); + RunString(script, Py_file_input, globals.get(), globals.get()); ASSERT_FALSE(o == NULL); Take<PythonObject>(o); diff --git a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp index fbb005b..068860e 100644 --- a/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp +++ b/lldb/unittests/ScriptInterpreter/Python/PythonTestSuite.cpp @@ -22,7 +22,7 @@ void PythonTestSuite::SetUp() { // test suite. Py_InitializeEx(0); m_gil_state = PyGILState_Ensure(); - PyRun_SimpleString("import sys"); + python::RunSimpleString("import sys"); } void PythonTestSuite::TearDown() { diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp index 805651ed..b993b82 100644 --- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp +++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/ExprCXX.h" +#include "llvm/IR/GlobalValue.h" #include "gtest/gtest.h" using namespace clang; @@ -1118,3 +1119,130 @@ TEST_F(TestTypeSystemClang, AddMethodToCXXRecordType_ParmVarDecls) { EXPECT_EQ(method_it->getParamDecl(0)->getDeclContext(), *method_it); EXPECT_EQ(method_it->getParamDecl(1)->getDeclContext(), *method_it); } + +TEST_F(TestTypeSystemClang, AsmLabel_CtorDtor) { + // Tests TypeSystemClang::DeclGetMangledName for constructors/destructors + // with and without AsmLabels. + + llvm::StringRef class_name = "S"; + CompilerType t = clang_utils::createRecord(*m_ast, class_name); + m_ast->StartTagDeclarationDefinition(t); + + CompilerType return_type = m_ast->GetBasicType(lldb::eBasicTypeVoid); + const bool is_virtual = false; + const bool is_static = false; + const bool is_inline = false; + const bool is_explicit = true; + const bool is_attr_used = false; + const bool is_artificial = false; + + CompilerType function_type = + m_ast->CreateFunctionType(return_type, {}, + /*variadic=*/false, /*quals*/ 0U); + auto *ctor_nolabel = m_ast->AddMethodToCXXRecordType( + t.GetOpaqueQualType(), "S", /*asm_label=*/{}, function_type, + lldb::AccessType::eAccessPublic, is_virtual, is_static, is_inline, + is_explicit, is_attr_used, is_artificial); + + auto *dtor_nolabel = m_ast->AddMethodToCXXRecordType( + t.GetOpaqueQualType(), "~S", /*asm_label=*/{}, function_type, + lldb::AccessType::eAccessPublic, is_virtual, is_static, is_inline, + is_explicit, is_attr_used, is_artificial); + + auto *ctor = m_ast->AddMethodToCXXRecordType( + t.GetOpaqueQualType(), "S", /*asm_label=*/"$__lldb_func:0x0:0x0:S", + function_type, lldb::AccessType::eAccessPublic, is_virtual, is_static, + is_inline, is_explicit, is_attr_used, is_artificial); + + auto *dtor = m_ast->AddMethodToCXXRecordType( + t.GetOpaqueQualType(), "~S", /*asm_label=*/"$__lldb_func:0x0:0x0:~S", + function_type, lldb::AccessType::eAccessPublic, is_virtual, is_static, + is_inline, is_explicit, is_attr_used, is_artificial); + + m_ast->CompleteTagDeclarationDefinition(t); + + ASSERT_TRUE(ctor_nolabel); + ASSERT_TRUE(dtor_nolabel); + ASSERT_TRUE(ctor); + ASSERT_TRUE(dtor); + +#ifdef _WIN32 + EXPECT_STREQ(m_ast->DeclGetMangledName(ctor_nolabel).GetCString(), + "??0S@@QEAA@XZ"); + EXPECT_STREQ(m_ast->DeclGetMangledName(dtor_nolabel).GetCString(), + "??_DS@@QEAAXXZ"); +#else + EXPECT_STREQ(m_ast->DeclGetMangledName(ctor_nolabel).GetCString(), + "_ZN1SC1Ev"); + EXPECT_STREQ(m_ast->DeclGetMangledName(dtor_nolabel).GetCString(), + "_ZN1SD1Ev"); +#endif + + EXPECT_STREQ(llvm::GlobalValue::dropLLVMManglingEscape( + m_ast->DeclGetMangledName(ctor).GetStringRef()) + .data(), + "$__lldb_func:0x0:0x0:S"); + EXPECT_STREQ(llvm::GlobalValue::dropLLVMManglingEscape( + m_ast->DeclGetMangledName(dtor).GetStringRef()) + .data(), + "$__lldb_func:0x0:0x0:~S"); +} + +struct AsmLabelTestCase { + llvm::StringRef mangled; + llvm::StringRef expected; +}; + +class TestTypeSystemClangAsmLabel + : public testing::TestWithParam<AsmLabelTestCase> { +public: + SubsystemRAII<FileSystem, HostInfo> subsystems; + + void SetUp() override { + m_holder = + std::make_unique<clang_utils::TypeSystemClangHolder>("test ASTContext"); + m_ast = m_holder->GetAST(); + } + + void TearDown() override { + m_ast = nullptr; + m_holder.reset(); + } + +protected: + TypeSystemClang *m_ast = nullptr; + std::unique_ptr<clang_utils::TypeSystemClangHolder> m_holder; +}; + +static AsmLabelTestCase g_asm_label_test_cases[] = { + {/*mangled=*/"$__lldb_func:0x0:0x0:_Z3foov", + /*expected=*/"_Z3foov"}, + {/*mangled=*/"$__lldb_func:0x0:0x0:foo", + /*expected=*/"$__lldb_func:0x0:0x0:foo"}, + {/*mangled=*/"foo", + /*expected=*/"foo"}, + {/*mangled=*/"_Z3foov", + /*expected=*/"_Z3foov"}, + {/*mangled=*/"$__lldb_func:", + /*expected=*/"$__lldb_func:"}, +}; + +TEST_P(TestTypeSystemClangAsmLabel, DeclGetMangledName) { + const auto &[mangled, expected] = GetParam(); + + CompilerType int_type = m_ast->GetBasicType(lldb::eBasicTypeInt); + clang::TranslationUnitDecl *TU = m_ast->GetTranslationUnitDecl(); + + // Prepare the declarations/types we need for the template. + CompilerType clang_type = m_ast->CreateFunctionType(int_type, {}, false, 0U); + FunctionDecl *func = m_ast->CreateFunctionDeclaration( + TU, OptionalClangModuleID(), "foo", clang_type, StorageClass::SC_None, + false, /*asm_label=*/mangled); + + ASSERT_EQ(llvm::GlobalValue::dropLLVMManglingEscape( + m_ast->DeclGetMangledName(func).GetStringRef()), + expected); +} + +INSTANTIATE_TEST_SUITE_P(AsmLabelTests, TestTypeSystemClangAsmLabel, + testing::ValuesIn(g_asm_label_test_cases)); diff --git a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst index 95ae4f7..ba670d3 100644 --- a/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst +++ b/llvm/docs/AMDGPUDwarfExtensionsForHeterogeneousDebugging.rst @@ -1187,7 +1187,7 @@ There are five kinds of location storage: operations. It would specify the debugger information entry and byte offset provided by the operations. -*Location descriptions are a language independent representation of addressing +*Location descriptions are a language-independent representation of addressing rules.* * *They can be the result of evaluating a debugger information entry attribute @@ -1523,8 +1523,8 @@ expression. states that relocation of references from one executable or shared object file to another must be performed by the consumer. But given that DR is defined as an offset in a ``.debug_info`` section this seems impossible. - If DR was defined as an implementation defined value, then the consumer - could choose to interpret the value in an implementation defined manner to + If DR was defined as an implementation-defined value, then the consumer + could choose to interpret the value in an implementation-defined manner to reference a debug information in another executable or shared object. In ELF the ``.debug_info`` section is in a non-\ ``PT_LOAD`` segment so @@ -4188,7 +4188,7 @@ The register rules are: conversion as the bit contents of the register is simply interpreted as a value of the address. - GDB has a per register hook that allows a target specific conversion on a + GDB has a per register hook that allows a target-specific conversion on a register by register basis. It defaults to truncation of bigger registers, and to actually reading bytes from the next register (or reads out of bounds for the last register) for smaller registers. There are no GDB tests that diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index c3d4833..5343d66 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1890,7 +1890,7 @@ The AMDGPU backend supports the following calling conventions: AMDGPU MCExpr ------------- -As part of the AMDGPU MC layer, AMDGPU provides the following target specific +As part of the AMDGPU MC layer, AMDGPU provides the following target-specific ``MCExpr``\s. .. table:: AMDGPU MCExpr types: diff --git a/llvm/docs/CodeGenerator.rst b/llvm/docs/CodeGenerator.rst index 020eb09..8260b5c 100644 --- a/llvm/docs/CodeGenerator.rst +++ b/llvm/docs/CodeGenerator.rst @@ -323,7 +323,7 @@ provide one of these objects through the ``getJITInfo`` method. Machine code description classes ================================ -At the high-level, LLVM code is translated to a machine specific representation +At the high-level, LLVM code is translated to a machine-specific representation formed out of :raw-html:`<tt>` `MachineFunction`_ :raw-html:`</tt>`, :raw-html:`<tt>` `MachineBasicBlock`_ :raw-html:`</tt>`, and :raw-html:`<tt>` `MachineInstr`_ :raw-html:`</tt>` instances (defined in @@ -462,7 +462,7 @@ code: ret This approach is extremely general (if it can handle the X86 architecture, it -can handle anything!) and allows all of the target specific knowledge about the +can handle anything!) and allows all of the target-specific knowledge about the instruction stream to be isolated in the instruction selector. Note that physical registers should have a short lifetime for good code generation, and all physical registers are assumed dead on entry to and exit from basic blocks @@ -634,7 +634,7 @@ file (MCObjectStreamer). MCAsmStreamer is a straightforward implementation that prints out a directive for each method (e.g. ``EmitValue -> .byte``), but MCObjectStreamer implements a full assembler. -For target specific directives, the MCStreamer has a MCTargetStreamer instance. +For target-specific directives, the MCStreamer has a MCTargetStreamer instance. Each target that needs it defines a class that inherits from it and is a lot like MCStreamer itself: It has one method per directive and two classes that inherit from it, a target object streamer and a target asm streamer. The target diff --git a/llvm/docs/CommandGuide/llvm-bcanalyzer.rst b/llvm/docs/CommandGuide/llvm-bcanalyzer.rst index 8f15e03..1e0b581 100644 --- a/llvm/docs/CommandGuide/llvm-bcanalyzer.rst +++ b/llvm/docs/CommandGuide/llvm-bcanalyzer.rst @@ -14,7 +14,7 @@ DESCRIPTION The :program:`llvm-bcanalyzer` command is a small utility for analyzing bitcode files. The tool reads a bitcode file (such as generated with the :program:`llvm-as` tool) and produces a statistical report on the contents of -the bitcode file. The tool can also dump a low level but human readable +the bitcode file. The tool can also dump a low level but human-readable version of the bitcode file. This tool is probably not of much interest or utility except for those working directly with the bitcode file format. Most LLVM users can just ignore this tool. @@ -30,7 +30,7 @@ OPTIONS .. option:: --dump - Causes :program:`llvm-bcanalyzer` to dump the bitcode in a human readable + Causes :program:`llvm-bcanalyzer` to dump the bitcode in a human-readable format. This format is significantly different from LLVM assembly and provides details about the encoding of the bitcode file. diff --git a/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst b/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst index 1264f80..6a4e348 100644 --- a/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst +++ b/llvm/docs/CommandGuide/llvm-debuginfo-analyzer.rst @@ -14,7 +14,7 @@ DESCRIPTION ----------- :program:`llvm-debuginfo-analyzer` parses debug and text sections in binary object files and prints their contents in a logical view, which -is a human readable representation that closely matches the structure +is a human-readable representation that closely matches the structure of the original user source code. Supported object file formats include ELF, Mach-O, WebAssembly, PDB and COFF. diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index 25e8969..5996026 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -106,7 +106,7 @@ properly. using the loop repetition mode. :program:`llvm-exegesis` needs to keep track of the current loop iteration within the loop repetition mode in a performant manner (i.e., no memory accesses), and uses a register to do this. This register - has an architecture specific default (e.g., `R8` on X86), but this might conflict + has an architecture-specific default (e.g., `R8` on X86), but this might conflict with some snippets. This annotation allows changing the register to prevent interference between the loop index register and the snippet. diff --git a/llvm/docs/CommandGuide/llvm-ifs.rst b/llvm/docs/CommandGuide/llvm-ifs.rst index 1fe81c2..e3582b3 100644 --- a/llvm/docs/CommandGuide/llvm-ifs.rst +++ b/llvm/docs/CommandGuide/llvm-ifs.rst @@ -11,7 +11,7 @@ SYNOPSIS DESCRIPTION ----------- -:program:`llvm-ifs` is a tool that jointly produces human readable text-based +:program:`llvm-ifs` is a tool that jointly produces human-readable text-based stubs (.ifs files) for shared objects and linkable shared object stubs (.so files) from either ELF shared objects or text-based stubs. The text-based stubs is useful for monitoring ABI changes of the shared object. The linkable diff --git a/llvm/docs/CommandGuide/llvm-locstats.rst b/llvm/docs/CommandGuide/llvm-locstats.rst index 3186566..7f436c1 100644 --- a/llvm/docs/CommandGuide/llvm-locstats.rst +++ b/llvm/docs/CommandGuide/llvm-locstats.rst @@ -13,7 +13,7 @@ DESCRIPTION :program:`llvm-locstats` works like a wrapper around :program:`llvm-dwarfdump`. It parses :program:`llvm-dwarfdump` statistics regarding debug location by -pretty printing it in a more human readable way. +pretty printing it in a more human-readable way. The line 0% shows the number and the percentage of DIEs with no location information, but the line 100% shows the information for DIEs where there is diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst index bea1931..1daae5d 100644 --- a/llvm/docs/CommandGuide/llvm-mca.rst +++ b/llvm/docs/CommandGuide/llvm-mca.rst @@ -241,7 +241,7 @@ option specifies "``-``", then the output will also be sent to standard output. .. option:: -disable-cb Force usage of the generic CustomBehaviour and InstrPostProcess classes rather - than using the target specific implementation. The generic classes never + than using the target-specific implementation. The generic classes never detect any custom hazards or make any post processing modifications to instructions. @@ -1125,9 +1125,9 @@ CustomBehaviour class can be used in these cases to enforce proper instruction modeling (often by customizing data dependencies and detecting hazards that :program:`llvm-mca` has no way of knowing about). -:program:`llvm-mca` comes with one generic and multiple target specific +:program:`llvm-mca` comes with one generic and multiple target-specific CustomBehaviour classes. The generic class will be used if the ``-disable-cb`` -flag is used or if a target specific CustomBehaviour class doesn't exist for +flag is used or if a target-specific CustomBehaviour class doesn't exist for that target. (The generic class does nothing.) Currently, the CustomBehaviour class is only a part of the in-order pipeline, but there are plans to add it to the out-of-order pipeline in the future. @@ -1141,7 +1141,7 @@ if you don't know the exact number and a value of 0 represents no stall). If you'd like to add a CustomBehaviour class for a target that doesn't already have one, refer to an existing implementation to see how to set it -up. The classes are implemented within the target specific backend (for +up. The classes are implemented within the target-specific backend (for example `/llvm/lib/Target/AMDGPU/MCA/`) so that they can access backend symbols. Instrument Manager @@ -1177,12 +1177,12 @@ classes (MCSubtargetInfo, MCInstrInfo, etc.), please add it to the AND requires unexposed backend symbols or functionality, you can define it in the `/lib/Target/<TargetName>/MCA/` directory. -To enable this target specific View, you will have to use this target's +To enable this target-specific View, you will have to use this target's CustomBehaviour class to override the `CustomBehaviour::getViews()` methods. There are 3 variations of these methods based on where you want your View to appear in the output: `getStartViews()`, `getPostInstrInfoViews()`, and `getEndViews()`. These methods returns a vector of Views so you will want to -return a vector containing all of the target specific Views for the target in +return a vector containing all of the target-specific Views for the target in question. Because these target specific (and backend dependent) Views require the diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst index b2c0457..0b1cd02 100644 --- a/llvm/docs/CommandGuide/llvm-profdata.rst +++ b/llvm/docs/CommandGuide/llvm-profdata.rst @@ -338,7 +338,7 @@ OPTIONS Instruct the profile dumper to show profile counts in the text format of the instrumentation-based profile data representation. By default, the profile - information is dumped in a more human readable form (also in text) with + information is dumped in a more human-readable form (also in text) with annotations. .. option:: --topn=<n> diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index 2da1b24..fb86a69 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -371,7 +371,7 @@ OPTIONS * Prints an address's debug-data discriminator when it is non-zero. One way to produce discriminators is to compile with clang's -fdebug-info-for-profiling. - ``JSON`` style provides a machine readable output in JSON. If addresses are + ``JSON`` style provides a machine-readable output in JSON. If addresses are supplied via stdin, the output JSON will be a series of individual objects. Otherwise, all results will be contained in a single array. @@ -444,7 +444,7 @@ OPTIONS .. option:: --pretty-print, -p - Print human readable output. If :option:`--inlining` is specified, the + Print human-readable output. If :option:`--inlining` is specified, the enclosing scope is prefixed by (inlined by). For JSON output, the option will cause JSON to be indented and split over new lines. Otherwise, the JSON output will be printed in a compact form. diff --git a/llvm/docs/CommandGuide/opt.rst b/llvm/docs/CommandGuide/opt.rst index f067f62..da93b8e 100644 --- a/llvm/docs/CommandGuide/opt.rst +++ b/llvm/docs/CommandGuide/opt.rst @@ -46,12 +46,12 @@ OPTIONS Write output in LLVM intermediate language (instead of bitcode). -.. option:: -{passname} +.. option:: -passes=<string> - :program:`opt` provides the ability to run any of LLVM's optimization or - analysis passes in any order. The :option:`-help` option lists all the passes - available. The order in which the options occur on the command line are the - order in which they are executed (within pass constraints). + A textual (comma-separated) description of the pass pipeline, + e.g., ``-passes="sroa,instcombine"``. See + `invoking opt <../NewPassManager.html#invoking-opt>`_ for more details on the + pass pipeline syntax. .. option:: -strip-debug diff --git a/llvm/docs/DirectX/DXContainer.rst b/llvm/docs/DirectX/DXContainer.rst index 4ace8a1..17452d9 100644 --- a/llvm/docs/DirectX/DXContainer.rst +++ b/llvm/docs/DirectX/DXContainer.rst @@ -280,7 +280,7 @@ elements are: This represents ``f5`` in the source. The LLVM ``obj2yaml`` tool can parse this data out of the PSV and present it in -human readable YAML. For the example above it produces the output: +human-readable YAML. For the example above it produces the output: .. code-block:: YAML diff --git a/llvm/docs/Frontend/PerformanceTips.rst b/llvm/docs/Frontend/PerformanceTips.rst index 4baf127..b81df70 100644 --- a/llvm/docs/Frontend/PerformanceTips.rst +++ b/llvm/docs/Frontend/PerformanceTips.rst @@ -35,7 +35,7 @@ The Basics ^^^^^^^^^^^ #. Make sure that your Modules contain both a data layout specification and - target triple. Without these pieces, non of the target specific optimization + target triple. Without these pieces, non of the target-specific optimization will be enabled. This can have a major effect on the generated code quality. #. For each function or global emitted, use the most private linkage type diff --git a/llvm/docs/FuzzingLLVM.rst b/llvm/docs/FuzzingLLVM.rst index 6b32eea..a0355d7 100644 --- a/llvm/docs/FuzzingLLVM.rst +++ b/llvm/docs/FuzzingLLVM.rst @@ -128,7 +128,7 @@ llvm-mc-assemble-fuzzer ----------------------- A |generic fuzzer| that fuzzes the MC layer's assemblers by treating inputs as -target specific assembly. +target-specific assembly. Note that this fuzzer has an unusual command line interface which is not fully compatible with all of libFuzzer's features. Fuzzer arguments must be passed diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.rst index e4dbb64b..8d0adf3 100644 --- a/llvm/docs/GettingStarted.rst +++ b/llvm/docs/GettingStarted.rst @@ -919,11 +919,11 @@ the `Command Guide <CommandGuide/index.html>`_. ``llvm-as`` - The assembler transforms the human readable LLVM assembly to LLVM bitcode. + The assembler transforms the human-readable LLVM assembly to LLVM bitcode. ``llvm-dis`` - The disassembler transforms the LLVM bitcode to human readable LLVM assembly. + The disassembler transforms the LLVM bitcode to human-readable LLVM assembly. ``llvm-link`` diff --git a/llvm/docs/GlobalISel/GMIR.rst b/llvm/docs/GlobalISel/GMIR.rst index 633dfb8..be7e677 100644 --- a/llvm/docs/GlobalISel/GMIR.rst +++ b/llvm/docs/GlobalISel/GMIR.rst @@ -26,7 +26,7 @@ Generic Machine Instructions Reference. Whereas MIR deals largely in Target Instructions and only has a small set of -target independent opcodes such as ``COPY``, ``PHI``, and ``REG_SEQUENCE``, +target-independent opcodes such as ``COPY``, ``PHI``, and ``REG_SEQUENCE``, gMIR defines a rich collection of ``Generic Opcodes`` which are target independent and describe operations which are typically supported by targets. One example is ``G_ADD`` which is the generic opcode for an integer addition. diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index 4816094..eefd76d 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -1105,7 +1105,7 @@ G_TRAP, G_DEBUGTRAP, G_UBSANTRAP ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Represents :ref:`llvm.trap <llvm.trap>`, :ref:`llvm.debugtrap <llvm.debugtrap>` -and :ref:`llvm.ubsantrap <llvm.ubsantrap>` that generate a target dependent +and :ref:`llvm.ubsantrap <llvm.ubsantrap>` that generate a target-dependent trap instructions. .. code-block:: none diff --git a/llvm/docs/GlobalISel/Pipeline.rst b/llvm/docs/GlobalISel/Pipeline.rst index 01bd4df..b9085e8 100644 --- a/llvm/docs/GlobalISel/Pipeline.rst +++ b/llvm/docs/GlobalISel/Pipeline.rst @@ -80,7 +80,7 @@ Combiner alternatives but Combiners can also focus on code size or other metrics. Additional passes such as these can be inserted to support higher optimization -levels or target specific needs. A likely pipeline is: +levels or target-specific needs. A likely pipeline is: .. image:: pipeline-overview-with-combiners.png diff --git a/llvm/docs/HowToUpdateDebugInfo.rst b/llvm/docs/HowToUpdateDebugInfo.rst index 915e289..ca420e7 100644 --- a/llvm/docs/HowToUpdateDebugInfo.rst +++ b/llvm/docs/HowToUpdateDebugInfo.rst @@ -499,7 +499,7 @@ a JSON file as follows: $ opt -verify-debuginfo-preserve -verify-di-preserve-export=sample.json -pass-to-test sample.ll and then use the ``llvm/utils/llvm-original-di-preservation.py`` script -to generate an HTML page with the issues reported in a more human readable form +to generate an HTML page with the issues reported in a more human-readable form as follows: .. code-block:: bash diff --git a/llvm/docs/JITLink.rst b/llvm/docs/JITLink.rst index 8902712..370281b 100644 --- a/llvm/docs/JITLink.rst +++ b/llvm/docs/JITLink.rst @@ -1072,7 +1072,7 @@ Major outstanding projects include: * Refactor architecture support to maximize sharing across formats. - All formats should be able to share the bulk of the architecture specific + All formats should be able to share the bulk of the architecture-specific code (especially relocations) for each supported architecture. * Refactor ELF link graph construction. diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 28746bf..2a8f0af 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -26,7 +26,7 @@ readable assembly language representation. This allows LLVM to provide a powerful intermediate representation for efficient compiler transformations and analysis, while providing a natural means to debug and visualize the transformations. The three different forms of LLVM are -all equivalent. This document describes the human readable +all equivalent. This document describes the human-readable representation and notation. The LLVM representation aims to be light-weight and low-level while @@ -21289,7 +21289,7 @@ Semantics: On some architectures the address of the code to be executed needs to be different than the address where the trampoline is actually stored. This intrinsic returns the executable address corresponding to ``tramp`` -after performing the required machine specific adjustments. The pointer +after performing the required machine-specific adjustments. The pointer returned can then be :ref:`bitcast and executed <int_trampoline>`. @@ -29382,7 +29382,7 @@ None. Semantics: """""""""" -This intrinsic is lowered to the target dependent trap instruction. If +This intrinsic is lowered to the target-dependent trap instruction. If the target does not have a trap instruction, this intrinsic will be lowered to a call of the ``abort()`` function. diff --git a/llvm/docs/Lexicon.rst b/llvm/docs/Lexicon.rst index 1d4894f..05315a8 100644 --- a/llvm/docs/Lexicon.rst +++ b/llvm/docs/Lexicon.rst @@ -192,7 +192,7 @@ L **LSDA** Language Specific Data Area. C++ "zero cost" unwinding is built on top a generic unwinding mechanism. As the unwinder walks each frame, it calls - a "personality" function to do language specific analysis. Each function's + a "personality" function to do language-specific analysis. Each function's FDE points to an optional LSDA which is passed to the personality function. For C++, the LSDA contain info about the type and location of catch statements in that function. diff --git a/llvm/docs/MIRLangRef.rst b/llvm/docs/MIRLangRef.rst index b4b59db..a505c1e 100644 --- a/llvm/docs/MIRLangRef.rst +++ b/llvm/docs/MIRLangRef.rst @@ -12,7 +12,7 @@ Introduction ============ This document is a reference manual for the Machine IR (MIR) serialization -format. MIR is a human readable serialization format that is used to represent +format. MIR is a human-readable serialization format that is used to represent LLVM's :ref:`machine specific intermediate representation <machine code representation>`. @@ -221,7 +221,7 @@ Machine Instructions Format Reference ===================================== The machine basic blocks and their instructions are represented using a custom, -human readable serialization language. This language is used in the +human-readable serialization language. This language is used in the `YAML block literal string`_ that corresponds to the machine function's body. A source string that uses this language contains a list of machine basic diff --git a/llvm/docs/PDB/CodeViewTypes.rst b/llvm/docs/PDB/CodeViewTypes.rst index 7a93ebe..996d8f9 100644 --- a/llvm/docs/PDB/CodeViewTypes.rst +++ b/llvm/docs/PDB/CodeViewTypes.rst @@ -123,7 +123,7 @@ The ``Size`` field of the Attributes bitmask is a 1-byte value indicating the pointer size. For example, a `void*` would have a size of either 4 or 8 depending on the target architecture. On the other hand, if ``Mode`` indicates that this is a pointer to member function or pointer to data member, then the size can be any -implementation defined number. +implementation-defined number. The ``Member Ptr Info`` field of the ``LF_POINTER`` record is only present if the attributes indicate that this is a pointer to member. diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 1f563fb..fdefc53 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -131,9 +131,23 @@ Extensions The SPIR-V backend supports a variety of `extensions <https://github.com/KhronosGroup/SPIRV-Registry/tree/main/extensions>`_ that enable or enhance features beyond the core SPIR-V specification. -These extensions can be enabled using the ``-spirv-extensions`` option -followed by the name of the extension(s) you wish to enable. Below is a -list of supported SPIR-V extensions, sorted alphabetically by their extension names: +The enabled extensions can be controlled using the ``-spirv-ext`` option followed by a list of +extensions to enable or disable, each prefixed with ``+`` or ``-``, respectively. + +To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use: + +``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_arbitrary_precision_integers`` + +To enable all extensions, use the following option: +``-spirv-ext=all`` + +To enable all KHR extensions, use the following option: +``-spirv-ext=khr`` + +To enable all extensions except specified, specify ``all`` followed by a list of disallowed extensions. For example: +``-spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers`` + +Below is a list of supported SPIR-V extensions, sorted alphabetically by their extension names: .. list-table:: Supported SPIR-V Extensions :widths: 50 150 @@ -220,16 +234,6 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na * - ``SPV_KHR_float_controls2`` - Adds ability to specify the floating-point environment in shaders. It can be used on whole modules and individual instructions. -To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use: - -``-spirv-ext=+SPV_EXT_shader_atomic_float_add,+SPV_INTEL_arbitrary_precision_integers`` - -To enable all extensions, use the following option: -``-spirv-ext=all`` - -To enable all extensions except specified, specify ``all`` followed by a list of disallowed extensions. For example: -``-spirv-ext=all,-SPV_INTEL_arbitrary_precision_integers`` - SPIR-V representation in LLVM IR ================================ diff --git a/llvm/docs/SymbolizerMarkupFormat.rst b/llvm/docs/SymbolizerMarkupFormat.rst index d5b17d7..75ead44 100644 --- a/llvm/docs/SymbolizerMarkupFormat.rst +++ b/llvm/docs/SymbolizerMarkupFormat.rst @@ -315,7 +315,7 @@ Trigger elements ================ These elements cause an external action and will be presented to the user in a -human readable form. Generally they trigger an external action to occur that +human-readable form. Generally they trigger an external action to occur that results in a linkable page. The link or some other informative information about the external action can then be presented to the user. diff --git a/llvm/docs/WritingAnLLVMBackend.rst b/llvm/docs/WritingAnLLVMBackend.rst index 3c5d594..cab6471 100644 --- a/llvm/docs/WritingAnLLVMBackend.rst +++ b/llvm/docs/WritingAnLLVMBackend.rst @@ -150,7 +150,7 @@ any other naming scheme will confuse ``llvm-config`` and produce a lot of To make your target actually do something, you need to implement a subclass of ``TargetMachine``. This implementation should typically be in the file ``lib/Target/DummyTargetMachine.cpp``, but any file in the ``lib/Target`` -directory will be built and should work. To use LLVM's target independent code +directory will be built and should work. To use LLVM's target-independent code generator, you should do what all current machine backends do: create a subclass of ``CodeGenTargetMachineImpl``. (To create a target from scratch, create a subclass of ``TargetMachine``.) @@ -1671,7 +1671,7 @@ For example in ``SparcTargetAsmInfo.cpp``: } The X86 assembly printer implementation (``X86TargetAsmInfo``) is an example -where the target specific ``TargetAsmInfo`` class uses an overridden methods: +where the target-specific ``TargetAsmInfo`` class uses an overridden methods: ``ExpandInlineAsm``. A target-specific implementation of ``AsmPrinter`` is written in diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.rst index 9c2c383..eec9887 100644 --- a/llvm/docs/WritingAnLLVMPass.rst +++ b/llvm/docs/WritingAnLLVMPass.rst @@ -431,7 +431,7 @@ The ``print`` method virtual void print(llvm::raw_ostream &O, const Module *M) const; The ``print`` method must be implemented by "analyses" in order to print a -human readable version of the analysis results. This is useful for debugging +human-readable version of the analysis results. This is useful for debugging an analysis itself, as well as for other people to figure out how an analysis works. Use the opt ``-analyze`` argument to invoke this method. diff --git a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl10.rst b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl10.rst index 7b9105b..a739936 100644 --- a/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl10.rst +++ b/llvm/docs/tutorial/MyFirstLanguageFrontend/LangImpl10.rst @@ -129,7 +129,7 @@ course, C source code is not actually portable in general either - ever port a really old application from 32- to 64-bits?). The problem with C (again, in its full generality) is that it is heavily -laden with target specific assumptions. As one simple example, the +laden with target-specific assumptions. As one simple example, the preprocessor often destructively removes target-independence from the code when it processes the input text: diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index af6e534..92304ed 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -180,10 +180,12 @@ public: const SmallVectorImpl<Instruction *> &Instrs) const; }; - MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, + MemoryDepChecker(PredicatedScalarEvolution &PSE, AssumptionCache *AC, + DominatorTree *DT, const Loop *L, const DenseMap<Value *, const SCEV *> &SymbolicStrides, unsigned MaxTargetVectorWidthInBits) - : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides), + : PSE(PSE), AC(AC), DT(DT), InnermostLoop(L), + SymbolicStrides(SymbolicStrides), MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {} /// Register the location (instructions are given increasing numbers) @@ -288,6 +290,15 @@ public: return PointerBounds; } + DominatorTree *getDT() const { + assert(DT && "requested DT, but it is not available"); + return DT; + } + AssumptionCache *getAC() const { + assert(AC && "requested AC, but it is not available"); + return AC; + } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them @@ -296,6 +307,10 @@ private: /// example we might assume a unit stride for a pointer in order to prove /// that a memory access is strided and doesn't wrap. PredicatedScalarEvolution &PSE; + + AssumptionCache *AC; + DominatorTree *DT; + const Loop *InnermostLoop; /// Reference to map of pointer values to @@ -670,7 +685,7 @@ public: LLVM_ABI LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI, + DominatorTree *DT, LoopInfo *LI, AssumptionCache *AC, bool AllowPartial = false); /// Return true we can analyze the memory accesses in the loop and there are @@ -922,7 +937,8 @@ LLVM_ABI std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap<std::pair<const SCEV *, Type *>, - std::pair<const SCEV *, const SCEV *>> *PointerBounds); + std::pair<const SCEV *, const SCEV *>> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC); class LoopAccessInfoManager { /// The cache. @@ -935,12 +951,13 @@ class LoopAccessInfoManager { LoopInfo &LI; TargetTransformInfo *TTI; const TargetLibraryInfo *TLI = nullptr; + AssumptionCache *AC; public: LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, TargetTransformInfo *TTI, - const TargetLibraryInfo *TLI) - : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {} + const TargetLibraryInfo *TLI, AssumptionCache *AC) + : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI), AC(AC) {} LLVM_ABI const LoopAccessInfo &getInfo(Loop &L, bool AllowPartial = false); diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index efda7eb..5a2aee2 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1303,8 +1303,8 @@ protected: SchedBoundary Top; SchedBoundary Bot; - ClusterInfo *TopCluster; - ClusterInfo *BotCluster; + unsigned TopClusterID; + unsigned BotClusterID; /// Candidate last picked from Top boundary. SchedCandidate TopCand; @@ -1346,8 +1346,8 @@ protected: /// Candidate last picked from Bot boundary. SchedCandidate BotCand; - ClusterInfo *TopCluster; - ClusterInfo *BotCluster; + unsigned TopClusterID; + unsigned BotClusterID; public: PostGenericScheduler(const MachineSchedContext *C) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 2967532..be90250 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -578,6 +578,18 @@ m_InsertSubvector(const LHS &Base, const RHS &Sub, const IDX &Idx) { return TernaryOpc_match<LHS, RHS, IDX>(ISD::INSERT_SUBVECTOR, Base, Sub, Idx); } +template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy> +inline auto m_SelectCC(const LTy &L, const RTy &R, const TTy &T, const FTy &F, + const CCTy &CC) { + return m_Node(ISD::SELECT_CC, L, R, T, F, CC); +} + +template <typename LTy, typename RTy, typename TTy, typename FTy, typename CCTy> +inline auto m_SelectCCLike(const LTy &L, const RTy &R, const TTy &T, + const FTy &F, const CCTy &CC) { + return m_AnyOf(m_Select(m_SetCC(L, R, CC), T, F), m_SelectCC(L, R, T, F, CC)); +} + // === Binary operations === template <typename LHS_P, typename RHS_P, bool Commutable = false, bool ExcludeChain = false> diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h index 3a0a31b..122b7be 100644 --- a/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -240,6 +240,11 @@ class TargetRegisterInfo; typedef SmallSet<SUnit *, 8> ClusterInfo; constexpr unsigned InvalidClusterId = ~0u; + /// Return whether the input cluster ID's are the same and valid. + inline bool isTheSameCluster(unsigned A, unsigned B) { + return A != InvalidClusterId && A == B; + } + /// Scheduling unit. This is a node in the scheduling DAG. class SUnit { private: diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index f11eccc..79f25bb 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -1460,7 +1460,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> { ]; let allowedOnceClauses = [ VersionedClause<OMPC_Collapse>, - VersionedClause<OMPC_If>, + VersionedClause<OMPC_If, 50>, VersionedClause<OMPC_NoWait>, VersionedClause<OMPC_Order, 50>, VersionedClause<OMPC_Ordered>, diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 7265a76..e85f986 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -662,10 +662,17 @@ class AMDGPUCvtScaleF32ToFP6BF6Intrinsic<LLVMType DstTy, LLVMType Src0Ty, LLVMTy def int_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_fp6_f16">; def int_amdgcn_cvt_scalef32_pk32_bf6_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_pk32_bf6_f16">; +def int_amdgcn_cvt_scalef32_pk8_fp8_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_fp8_bf16">; +def int_amdgcn_cvt_scalef32_pk8_bf8_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_bf8_bf16">; def int_amdgcn_cvt_scalef32_pk32_fp6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_fp6_bf16">; def int_amdgcn_cvt_scalef32_pk32_bf6_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_pk32_bf6_bf16">; -def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">; -def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">; +def int_amdgcn_cvt_scalef32_pk8_fp8_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_fp8_f16">; +def int_amdgcn_cvt_scalef32_pk8_bf8_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_bf8_f16">; +def int_amdgcn_cvt_scalef32_pk8_fp8_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_fp8_f32">; +def int_amdgcn_cvt_scalef32_pk8_bf8_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_v2i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_bf8_f32">; +def int_amdgcn_cvt_scalef32_pk8_fp4_f32 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8f32_ty, "cvt_scalef32_pk8_fp4_f32">; +def int_amdgcn_cvt_scalef32_pk8_fp4_f16 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8f16_ty, "cvt_scalef32_pk8_fp4_f16">; +def int_amdgcn_cvt_scalef32_pk8_fp4_bf16 : AMDGPUCvtScaleF32Intrinsic<llvm_i32_ty, llvm_v8bf16_ty, "cvt_scalef32_pk8_fp4_bf16">; def int_amdgcn_cvt_scalef32_sr_pk32_bf6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32bf16_ty, "cvt_scalef32_sr_pk32_bf6_bf16">; def int_amdgcn_cvt_scalef32_sr_pk32_bf6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_bf6_f16">; @@ -674,6 +681,9 @@ def int_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16 : AMDGPUCvtScaleF32SRIntrinsic<llvm def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f16 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f16_ty, "cvt_scalef32_sr_pk32_fp6_f16">; def int_amdgcn_cvt_scalef32_sr_pk32_fp6_f32 : AMDGPUCvtScaleF32SRIntrinsic<llvm_v6i32_ty, llvm_v32f32_ty, "cvt_scalef32_sr_pk32_fp6_f32">; +def int_amdgcn_cvt_scalef32_2xpk16_fp6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_fp6_f32">; +def int_amdgcn_cvt_scalef32_2xpk16_bf6_f32 : AMDGPUCvtScaleF32ToFP6BF6Intrinsic<llvm_v6i32_ty, llvm_v16f32_ty, llvm_v16f32_ty, "cvt_scalef32_2xpk16_bf6_f32">; + class AMDGPUCvtScaleFP4FP8BF8ToF1632Intrinsic<LLVMType DstTy, string name> : DefaultAttrsIntrinsic< [DstTy], [llvm_i32_ty, // src @@ -3656,6 +3666,36 @@ def int_amdgcn_sat_pk4_i4_i8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_i4_i8">, def int_amdgcn_sat_pk4_u4_u8 : ClangBuiltin<"__builtin_amdgcn_sat_pk4_u4_u8">, DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i32_ty], [IntrNoMem, IntrSpeculatable]>; +// llvm.amdgcn.permlane.bcast <src0> <src1> <src2> +def int_amdgcn_permlane_bcast : ClangBuiltin<"__builtin_amdgcn_permlane_bcast">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.up <src0> <src1> <src2> +def int_amdgcn_permlane_up : ClangBuiltin<"__builtin_amdgcn_permlane_up">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.down <src0> <src1> <src2> +def int_amdgcn_permlane_down : ClangBuiltin<"__builtin_amdgcn_permlane_down">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.xor <src0> <src1> <src2> +def int_amdgcn_permlane_xor : ClangBuiltin<"__builtin_amdgcn_permlane_xor">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + +// llvm.amdgcn.permlane.idx.gen <src0> <src1> +def int_amdgcn_permlane_idx_gen : ClangBuiltin<"__builtin_amdgcn_permlane_idx_gen">, + Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; + //===----------------------------------------------------------------------===// // Special Intrinsics for backend internal use only. No frontend // should emit calls to these. diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index e63a41f..99f975f 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -1717,6 +1717,16 @@ let TargetPrefix = "riscv" in { llvm_anyint_ty], [NoCapture<ArgIndex<0>>, IntrReadMem]>; + // Input: (pointer, offset, mask, vl) + def int_riscv_sseg # nf # _load_mask + : DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], + !listsplat(LLVMMatchType<0>, + !add(nf, -1))), + [llvm_anyptr_ty, llvm_anyint_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyint_ty], + [NoCapture<ArgIndex<0>>, IntrReadMem]>; + // Input: (<stored values>..., pointer, mask, vl) def int_riscv_seg # nf # _store_mask : DefaultAttrsIntrinsic<[], diff --git a/llvm/include/llvm/Support/DebugLog.h b/llvm/include/llvm/Support/DebugLog.h index 8fca2d5..a331295 100644 --- a/llvm/include/llvm/Support/DebugLog.h +++ b/llvm/include/llvm/Support/DebugLog.h @@ -61,8 +61,10 @@ namespace llvm { for (bool _c = \ (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE, LEVEL)); \ _c; _c = false) \ + for (::llvm::impl::RAIINewLineStream NewLineStream{(STREAM)}; _c; \ + _c = false) \ ::llvm::impl::raw_ldbg_ostream{ \ - ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), (STREAM)} \ + ::llvm::impl::computePrefix(TYPE, FILE, LINE, LEVEL), NewLineStream} \ .asLvalue() #define DEBUGLOG_WITH_STREAM_TYPE_AND_FILE(STREAM, LEVEL, TYPE, FILE) \ @@ -81,14 +83,15 @@ namespace llvm { namespace impl { -/// A raw_ostream that tracks `\n` and print the prefix. +/// A raw_ostream that tracks `\n` and print the prefix after each +/// newline. class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { std::string Prefix; raw_ostream &Os; - bool HasPendingNewline = true; + bool HasPendingNewline; - /// Split the line on newlines and insert the prefix before each newline. - /// Forward everything to the underlying stream. + /// Split the line on newlines and insert the prefix before each + /// newline. Forward everything to the underlying stream. void write_impl(const char *Ptr, size_t Size) final { auto Str = StringRef(Ptr, Size); // Handle the initial prefix. @@ -109,22 +112,18 @@ class LLVM_ABI raw_ldbg_ostream final : public raw_ostream { } void emitPrefix() { Os.write(Prefix.c_str(), Prefix.size()); } void writeWithPrefix(StringRef Str) { - if (HasPendingNewline) { - emitPrefix(); - HasPendingNewline = false; - } + flushEol(); Os.write(Str.data(), Str.size()); } public: - explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os) - : Prefix(std::move(Prefix)), Os(Os) { + explicit raw_ldbg_ostream(std::string Prefix, raw_ostream &Os, + bool HasPendingNewline = true) + : Prefix(std::move(Prefix)), Os(Os), + HasPendingNewline(HasPendingNewline) { SetUnbuffered(); } - ~raw_ldbg_ostream() final { - flushEol(); - Os << '\n'; - } + ~raw_ldbg_ostream() final { flushEol(); } void flushEol() { if (HasPendingNewline) { emitPrefix(); @@ -135,10 +134,22 @@ public: /// Forward the current_pos method to the underlying stream. uint64_t current_pos() const final { return Os.tell(); } - /// Some of the `<<` operators expect an lvalue, so we trick the type system. + /// Some of the `<<` operators expect an lvalue, so we trick the type + /// system. raw_ldbg_ostream &asLvalue() { return *this; } }; +/// A raw_ostream that prints a newline on destruction, useful for LDBG() +class RAIINewLineStream final : public raw_ostream { + raw_ostream &Os; + +public: + RAIINewLineStream(raw_ostream &Os) : Os(Os) { SetUnbuffered(); } + ~RAIINewLineStream() { Os << '\n'; } + void write_impl(const char *Ptr, size_t Size) final { Os.write(Ptr, Size); } + uint64_t current_pos() const final { return Os.tell(); } +}; + /// Remove the path prefix from the file name. static LLVM_ATTRIBUTE_UNUSED constexpr const char * getShortFileName(const char *path) { diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index 719c0ee..e57032a 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -6494,7 +6494,7 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> { } constexpr static const std::optional<TypeSize> HasNoAllocationSize = - std::optional<TypeSize>(TypeSize(-1, true)); + std::make_optional<TypeSize>(-1, true); LLVM_ABI static const char ID; }; diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index cba3736..43ff084 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -400,19 +400,11 @@ public: /// Returns true if the loop has exactly one uncountable early exit, i.e. an /// uncountable exit that isn't the latch block. - bool hasUncountableEarlyExit() const { - return getUncountableEdge().has_value(); - } + bool hasUncountableEarlyExit() const { return UncountableExitingBB; } /// Returns the uncountable early exiting block, if there is exactly one. BasicBlock *getUncountableEarlyExitingBlock() const { - return hasUncountableEarlyExit() ? getUncountableEdge()->first : nullptr; - } - - /// Returns the destination of the uncountable early exiting block, if there - /// is exactly one. - BasicBlock *getUncountableEarlyExitBlock() const { - return hasUncountableEarlyExit() ? getUncountableEdge()->second : nullptr; + return UncountableExitingBB; } /// Return true if there is store-load forwarding dependencies. @@ -473,13 +465,6 @@ public: return CountableExitingBlocks; } - /// Returns the loop edge to an uncountable exit, or std::nullopt if there - /// isn't a single such edge. - std::optional<std::pair<BasicBlock *, BasicBlock *>> - getUncountableEdge() const { - return UncountableEdge; - } - private: /// Return true if the pre-header, exiting and latch blocks of \p Lp and all /// its nested loops are considered legal for vectorization. These legal @@ -659,9 +644,9 @@ private: /// the exact backedge taken count is not computable. SmallVector<BasicBlock *, 4> CountableExitingBlocks; - /// Keep track of the loop edge to an uncountable exit, comprising a pair - /// of (Exiting, Exit) blocks, if there is exactly one early exit. - std::optional<std::pair<BasicBlock *, BasicBlock *>> UncountableEdge; + /// Keep track of an uncountable exiting block, if there is exactly one early + /// exit. + BasicBlock *UncountableExitingBB = nullptr; }; } // namespace llvm diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 2d52f34..dd98b62 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2679,11 +2679,12 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, case Intrinsic::nvvm_round_ftz_f: case Intrinsic::nvvm_round_f: case Intrinsic::nvvm_round_d: { - // Use APFloat implementation instead of native libm call, as some - // implementations (e.g. on PPC) do not preserve the sign of negative 0. + // nvvm_round is lowered to PTX cvt.rni, which will round to nearest + // integer, choosing even integer if source is equidistant between two + // integers, so the semantics are closer to "rint" rather than "round". bool IsFTZ = nvvm::UnaryMathIntrinsicShouldFTZ(IntrinsicID); auto V = IsFTZ ? FTZPreserveSign(APF) : APF; - V.roundToIntegral(APFloat::rmNearestTiesToAway); + V.roundToIntegral(APFloat::rmNearestTiesToEven); return ConstantFP::get(Ty->getContext(), V); } diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 393f264..6fc81d787 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -342,7 +342,7 @@ bool llvm::isDereferenceableAndAlignedInLoop( : SE.getConstantMaxBackedgeTakenCount(L); } const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess( - L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr); + L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC); if (isa<SCEVCouldNotCompute>(AccessStart) || isa<SCEVCouldNotCompute>(AccessEnd)) return false; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 14be385..a553533 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -23,6 +23,8 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/AssumeBundleQueries.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -208,28 +210,46 @@ static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B, /// Return true, if evaluating \p AR at \p MaxBTC cannot wrap, because \p AR at /// \p MaxBTC is guaranteed inbounds of the accessed object. -static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, - const SCEV *MaxBTC, - const SCEV *EltSize, - ScalarEvolution &SE, - const DataLayout &DL) { +static bool +evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, + const SCEV *MaxBTC, const SCEV *EltSize, + ScalarEvolution &SE, const DataLayout &DL, + DominatorTree *DT, AssumptionCache *AC) { auto *PointerBase = SE.getPointerBase(AR->getStart()); auto *StartPtr = dyn_cast<SCEVUnknown>(PointerBase); if (!StartPtr) return false; + const Loop *L = AR->getLoop(); bool CheckForNonNull, CheckForFreed; - uint64_t DerefBytes = StartPtr->getValue()->getPointerDereferenceableBytes( + Value *StartPtrV = StartPtr->getValue(); + uint64_t DerefBytes = StartPtrV->getPointerDereferenceableBytes( DL, CheckForNonNull, CheckForFreed); - if (CheckForNonNull || CheckForFreed) + if (DerefBytes && (CheckForNonNull || CheckForFreed)) return false; const SCEV *Step = AR->getStepRecurrence(SE); + Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); + const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes); + + // Check if we have a suitable dereferencable assumption we can use. + if (!StartPtrV->canBeFreed()) { + RetainedKnowledge DerefRK = getKnowledgeValidInContext( + StartPtrV, {Attribute::Dereferenceable}, *AC, + L->getLoopPredecessor()->getTerminator(), DT); + if (DerefRK) { + DerefBytesSCEV = SE.getUMaxExpr( + DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue)); + } + } + + if (DerefBytesSCEV->isZero()) + return false; + bool IsKnownNonNegative = SE.isKnownNonNegative(Step); if (!IsKnownNonNegative && !SE.isKnownNegative(Step)) return false; - Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType()); Step = SE.getNoopOrSignExtend(Step, WiderTy); MaxBTC = SE.getNoopOrZeroExtend(MaxBTC, WiderTy); @@ -256,8 +276,7 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE); if (!EndBytes) return false; - return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, - SE.getConstant(WiderTy, DerefBytes)); + return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV); } // For negative steps check if @@ -265,15 +284,15 @@ static bool evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR, // * StartOffset <= DerefBytes. assert(SE.isKnownNegative(Step) && "must be known negative"); return SE.isKnownPredicate(CmpInst::ICMP_SGE, StartOffset, OffsetEndBytes) && - SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, - SE.getConstant(WiderTy, DerefBytes)); + SE.isKnownPredicate(CmpInst::ICMP_ULE, StartOffset, DerefBytesSCEV); } std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *BTC, const SCEV *MaxBTC, ScalarEvolution *SE, DenseMap<std::pair<const SCEV *, Type *>, - std::pair<const SCEV *, const SCEV *>> *PointerBounds) { + std::pair<const SCEV *, const SCEV *>> *PointerBounds, + DominatorTree *DT, AssumptionCache *AC) { std::pair<const SCEV *, const SCEV *> *PtrBoundsPair; if (PointerBounds) { auto [Iter, Ins] = PointerBounds->insert( @@ -308,8 +327,8 @@ std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess( // sets ScEnd to the maximum unsigned value for the type. Note that LAA // separately checks that accesses cannot not wrap, so unsigned max // represents an upper bound. - if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, - DL)) { + if (evaluatePtrAddRecAtMaxBTCWillNotWrap(AR, MaxBTC, EltSizeSCEV, *SE, DL, + DT, AC)) { ScEnd = AR->evaluateAtIteration(MaxBTC, *SE); } else { ScEnd = SE->getAddExpr( @@ -356,9 +375,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, bool NeedsFreeze) { const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); const SCEV *BTC = PSE.getBackedgeTakenCount(); - const auto &[ScStart, ScEnd] = - getStartAndEndForAccess(Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, - PSE.getSE(), &DC.getPointerBounds()); + const auto &[ScStart, ScEnd] = getStartAndEndForAccess( + Lp, PtrExpr, AccessTy, BTC, SymbolicMaxBTC, PSE.getSE(), + &DC.getPointerBounds(), DC.getDT(), DC.getAC()); assert(!isa<SCEVCouldNotCompute>(ScStart) && !isa<SCEVCouldNotCompute>(ScEnd) && "must be able to compute both start and end expressions"); @@ -1961,13 +1980,15 @@ bool MemoryDepChecker::areAccessesCompletelyBeforeOrAfter(const SCEV *Src, const SCEV *BTC = PSE.getBackedgeTakenCount(); const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); ScalarEvolution &SE = *PSE.getSE(); - const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess( - InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + const auto &[SrcStart_, SrcEnd_] = + getStartAndEndForAccess(InnermostLoop, Src, SrcTy, BTC, SymbolicMaxBTC, + &SE, &PointerBounds, DT, AC); if (isa<SCEVCouldNotCompute>(SrcStart_) || isa<SCEVCouldNotCompute>(SrcEnd_)) return false; - const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess( - InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, &SE, &PointerBounds); + const auto &[SinkStart_, SinkEnd_] = + getStartAndEndForAccess(InnermostLoop, Sink, SinkTy, BTC, SymbolicMaxBTC, + &SE, &PointerBounds, DT, AC); if (isa<SCEVCouldNotCompute>(SinkStart_) || isa<SCEVCouldNotCompute>(SinkEnd_)) return false; @@ -3002,7 +3023,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetTransformInfo *TTI, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI, - bool AllowPartial) + AssumptionCache *AC, bool AllowPartial) : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), PtrRtChecking(nullptr), TheLoop(L), AllowPartial(AllowPartial) { unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); @@ -3012,8 +3033,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, MaxTargetVectorWidthInBits = TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) * 2; - DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides, - MaxTargetVectorWidthInBits); + DepChecker = std::make_unique<MemoryDepChecker>( + *PSE, AC, DT, L, SymbolicStrides, MaxTargetVectorWidthInBits); PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); if (canAnalyzeLoop()) CanVecMem = analyzeLoop(AA, LI, TLI, DT); @@ -3082,7 +3103,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L, // or if it was created with a different value of AllowPartial. if (Inserted || It->second->hasAllowPartial() != AllowPartial) It->second = std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, - &LI, AllowPartial); + &LI, AC, AllowPartial); return *It->second; } @@ -3125,7 +3146,8 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, auto &LI = FAM.getResult<LoopAnalysis>(F); auto &TTI = FAM.getResult<TargetIRAnalysis>(F); auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI); + auto &AC = FAM.getResult<AssumptionAnalysis>(F); + return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI, &AC); } AnalysisKey LoopAccessAnalysis::Key; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index 9d5c39c..c6fa8f4 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -3676,8 +3676,8 @@ void GenericScheduler::initialize(ScheduleDAGMI *dag) { TopCand.SU = nullptr; BotCand.SU = nullptr; - TopCluster = nullptr; - BotCluster = nullptr; + TopClusterID = InvalidClusterId; + BotClusterID = InvalidClusterId; } /// Initialize the per-region scheduling policy. @@ -3988,10 +3988,14 @@ bool GenericScheduler::tryCandidate(SchedCandidate &Cand, // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -4251,24 +4255,30 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) { void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - TopCluster = DAG->getCluster(SU->ParentClusterIdx); - LLVM_DEBUG(if (TopCluster) { - dbgs() << " Top Cluster: "; - for (auto *N : *TopCluster) - dbgs() << N->NodeNum << '\t'; - dbgs() << '\n'; + TopClusterID = SU->ParentClusterIdx; + LLVM_DEBUG({ + if (TopClusterID != InvalidClusterId) { + ClusterInfo *TopCluster = DAG->getCluster(TopClusterID); + dbgs() << " Top Cluster: "; + for (auto *N : *TopCluster) + dbgs() << N->NodeNum << '\t'; + dbgs() << '\n'; + } }); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysReg(SU, true); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); - BotCluster = DAG->getCluster(SU->ParentClusterIdx); - LLVM_DEBUG(if (BotCluster) { - dbgs() << " Bot Cluster: "; - for (auto *N : *BotCluster) - dbgs() << N->NodeNum << '\t'; - dbgs() << '\n'; + BotClusterID = SU->ParentClusterIdx; + LLVM_DEBUG({ + if (BotClusterID != InvalidClusterId) { + ClusterInfo *BotCluster = DAG->getCluster(BotClusterID); + dbgs() << " Bot Cluster: "; + for (auto *N : *BotCluster) + dbgs() << N->NodeNum << '\t'; + dbgs() << '\n'; + } }); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) @@ -4306,8 +4316,8 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { if (!Bot.HazardRec) { Bot.HazardRec = DAG->TII->CreateTargetMIHazardRecognizer(Itin, DAG); } - TopCluster = nullptr; - BotCluster = nullptr; + TopClusterID = InvalidClusterId; + BotClusterID = InvalidClusterId; } void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, @@ -4373,10 +4383,14 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand, return TryCand.Reason != NoCand; // Keep clustered nodes together. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; // Avoid critical resource consumption and balance the schedule. @@ -4575,11 +4589,11 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); - TopCluster = DAG->getCluster(SU->ParentClusterIdx); + TopClusterID = SU->ParentClusterIdx; Top.bumpNode(SU); } else { SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); - BotCluster = DAG->getCluster(SU->ParentClusterIdx); + BotClusterID = SU->ParentClusterIdx; Bot.bumpNode(SU); } } diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 69b9291..2400a1f 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -178,10 +178,8 @@ void RegAllocBase::cleanupFailedVReg(Register FailedReg, MCRegister PhysReg, for (MCRegAliasIterator Aliases(PhysReg, TRI, true); Aliases.isValid(); ++Aliases) { for (MachineOperand &MO : MRI->reg_operands(*Aliases)) { - if (MO.readsReg()) { + if (MO.readsReg()) MO.setIsUndef(true); - LIS->removeAllRegUnitsForPhysReg(MO.getReg()); - } } } } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a43020e..11e869a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -331,6 +331,11 @@ namespace { return CombineTo(N, To, 2, AddTo); } + SDValue CombineTo(SDNode *N, SmallVectorImpl<SDValue> *To, + bool AddTo = true) { + return CombineTo(N, To->data(), To->size(), AddTo); + } + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); private: @@ -541,6 +546,7 @@ namespace { SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitVECTOR_INTERLEAVE(SDNode *N); SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitSCALAR_TO_VECTOR(SDNode *N); @@ -2021,6 +2027,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::VECTOR_INTERLEAVE: return visitVECTOR_INTERLEAVE(N); case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); @@ -4100,18 +4107,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // (sub x, ([v]select (uge x, y), y, 0)) -> (umin x, (sub x, y)) if (N1.hasOneUse() && hasUMin(VT)) { SDValue Y; - if (sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_Select(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero())) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETULT)), - m_Zero(), m_Deferred(Y))) || - sd_match(N1, m_VSelect(m_SetCC(m_Specific(N0), m_Value(Y), - m_SpecificCondCode(ISD::SETUGE)), - m_Deferred(Y), m_Zero()))) + auto MS0 = m_Specific(N0); + auto MVY = m_Value(Y); + auto MZ = m_Zero(); + auto MCC1 = m_SpecificCondCode(ISD::SETULT); + auto MCC2 = m_SpecificCondCode(ISD::SETUGE); + + if (sd_match(N1, m_SelectCCLike(MS0, MVY, MZ, m_Deferred(Y), MCC1)) || + sd_match(N1, m_SelectCCLike(MS0, MVY, m_Deferred(Y), MZ, MCC2)) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC1), MZ, m_Deferred(Y))) || + sd_match(N1, m_VSelect(m_SetCC(MS0, MVY, MCC2), m_Deferred(Y), MZ))) + return DAG.getNode(ISD::UMIN, DL, VT, N0, DAG.getNode(ISD::SUB, DL, VT, N0, Y)); } @@ -10616,6 +10622,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getVScale(DL, VT, C0 << C1); } + SDValue X; + APInt VS0; + + // fold (shl (X * vscale(VS0)), C1) -> (X * vscale(VS0 << C1)) + if (N1C && sd_match(N0, m_Mul(m_Value(X), m_VScale(m_ConstInt(VS0))))) { + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(N->getFlags().hasNoUnsignedWrap() && + N0->getFlags().hasNoUnsignedWrap()); + + SDValue VScale = DAG.getVScale(DL, VT, VS0 << N1C->getAPIntValue()); + return DAG.getNode(ISD::MUL, DL, VT, X, VScale, Flags); + } + // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; if (N0.getOpcode() == ISD::STEP_VECTOR && @@ -25282,6 +25301,28 @@ static SDValue combineConcatVectorOfShuffleAndItsOperands( return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask); } +static SDValue combineConcatVectorOfSplats(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalTypes, + bool LegalOperations) { + EVT VT = N->getValueType(0); + + // Post-legalization we can only create wider SPLAT_VECTOR operations if both + // the type and operation is legal. The Hexagon target has custom + // legalization for SPLAT_VECTOR that splits the operation into two parts and + // concatenates them. Therefore, custom lowering must also be rejected in + // order to avoid an infinite loop. + if ((LegalTypes && !TLI.isTypeLegal(VT)) || + (LegalOperations && !TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + if (!llvm::all_equal(N->op_values()) || Op0.getOpcode() != ISD::SPLAT_VECTOR) + return SDValue(); + + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, Op0.getOperand(0)); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -25405,6 +25446,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(VT, SDLoc(N), Opnds); } + if (SDValue V = + combineConcatVectorOfSplats(N, DAG, TLI, LegalTypes, LegalOperations)) + return V; + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) @@ -25473,6 +25518,21 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVECTOR_INTERLEAVE(SDNode *N) { + // Check to see if all operands are identical. + if (!llvm::all_equal(N->op_values())) + return SDValue(); + + // Check to see if the identical operand is a splat. + if (!DAG.isSplatValue(N->getOperand(0))) + return SDValue(); + + // interleave splat(X), splat(X).... --> splat(X), splat(X).... + SmallVector<SDValue, 4> Ops; + Ops.append(N->op_values().begin(), N->op_values().end()); + return CombineTo(N, &Ops); +} + // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find // if the subvector can be sourced for free. static SDValue getSubVectorSrc(SDValue V, unsigned Index, EVT SubVT) { @@ -28965,13 +29025,27 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, ((N1C->isAllOnes() && CC == ISD::SETGT) || (N1C->isZero() && CC == ISD::SETLT)) && !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { - SDValue ASR = DAG.getNode( - ISD::SRA, DL, CmpOpVT, N0, - DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); - return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), + SDValue ASHR = + DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, + DAG.getShiftAmountConstant( + CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL)); + return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT), DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); } + // Fold sign pattern select_cc setgt X, -1, 1, -1 -> or (ashr X, BW-1), 1 + if (CC == ISD::SETGT && N1C && N2C && N3C && N1C->isAllOnes() && + N2C->isOne() && N3C->isAllOnes() && + !TLI.shouldAvoidTransformToShift(CmpOpVT, + CmpOpVT.getScalarSizeInBits() - 1)) { + SDValue ASHR = + DAG.getNode(ISD::SRA, DL, CmpOpVT, N0, + DAG.getShiftAmountConstant( + CmpOpVT.getScalarSizeInBits() - 1, CmpOpVT, DL)); + return DAG.getNode(ISD::OR, DL, VT, DAG.getSExtOrTrunc(ASHR, DL, VT), + DAG.getConstant(1, DL, VT)); + } + if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) return S; if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG)) diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index f16963d..f1d4549 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -1012,7 +1012,7 @@ DIDerivedType *DIDerivedType::getImpl( std::optional<DIDerivedType::PtrAuthData> DIDerivedType::getPtrAuthData() const { return getTag() == dwarf::DW_TAG_LLVM_ptrauth_type - ? std::optional<PtrAuthData>(PtrAuthData(SubclassData32)) + ? std::make_optional<PtrAuthData>(SubclassData32) : std::nullopt; } diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp index 0ca6217..b00419b 100644 --- a/llvm/lib/Remarks/RemarkLinker.cpp +++ b/llvm/lib/Remarks/RemarkLinker.cpp @@ -70,8 +70,8 @@ Error RemarkLinker::link(StringRef Buffer, Format RemarkFormat) { Expected<std::unique_ptr<RemarkParser>> MaybeParser = createRemarkParserFromMeta( RemarkFormat, Buffer, - PrependPath ? std::optional<StringRef>(StringRef(*PrependPath)) - : std::optional<StringRef>()); + PrependPath ? std::make_optional<StringRef>(*PrependPath) + : std::nullopt); if (!MaybeParser) return MaybeParser.takeError(); diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 5e0b29f..46084c5 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -900,6 +900,30 @@ writeSignedDecimal (char *dst, int value) return dst; } +// Compute the ULP of the input using a definition from: +// Jean-Michel Muller. On the definition of ulp(x). [Research Report] RR-5504, +// LIP RR-2005-09, INRIA, LIP. 2005, pp.16. inria-00070503 +static APFloat harrisonUlp(const APFloat &X) { + const fltSemantics &Sem = X.getSemantics(); + switch (X.getCategory()) { + case APFloat::fcNaN: + return APFloat::getQNaN(Sem); + case APFloat::fcInfinity: + return APFloat::getInf(Sem); + case APFloat::fcZero: + return APFloat::getSmallest(Sem); + case APFloat::fcNormal: + break; + } + if (X.isDenormal() || X.isSmallestNormalized()) + return APFloat::getSmallest(Sem); + int Exp = ilogb(X); + if (X.getExactLog2() != INT_MIN) + Exp -= 1; + return scalbn(APFloat::getOne(Sem), Exp - (Sem.precision - 1), + APFloat::rmNearestTiesToEven); +} + namespace detail { /* Constructors. */ void IEEEFloat::initialize(const fltSemantics *ourSemantics) { @@ -5306,12 +5330,110 @@ Expected<APFloat::opStatus> DoubleAPFloat::convertFromString(StringRef S, return Ret; } +// The double-double lattice of values corresponds to numbers which obey: +// - abs(lo) <= 1/2 * ulp(hi) +// - roundTiesToEven(hi + lo) == hi +// +// nextUp must choose the smallest output > input that follows these rules. +// nexDown must choose the largest output < input that follows these rules. APFloat::opStatus DoubleAPFloat::next(bool nextDown) { assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); - APFloat Tmp(semPPCDoubleDoubleLegacy, bitcastToAPInt()); - auto Ret = Tmp.next(nextDown); - *this = DoubleAPFloat(semPPCDoubleDouble, Tmp.bitcastToAPInt()); - return Ret; + // nextDown(x) = -nextUp(-x) + if (nextDown) { + changeSign(); + APFloat::opStatus Result = next(/*nextDown=*/false); + changeSign(); + return Result; + } + switch (getCategory()) { + case fcInfinity: + // nextUp(+inf) = +inf + // nextUp(-inf) = -getLargest() + if (isNegative()) + makeLargest(true); + return opOK; + + case fcNaN: + // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. + // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not + // change the payload. + if (getFirst().isSignaling()) { + // For consistency, propagate the sign of the sNaN to the qNaN. + makeNaN(false, isNegative(), nullptr); + return opInvalidOp; + } + return opOK; + + case fcZero: + // nextUp(pm 0) = +getSmallest() + makeSmallest(false); + return opOK; + + case fcNormal: + break; + } + + const APFloat &HiOld = getFirst(); + const APFloat &LoOld = getSecond(); + + APFloat NextLo = LoOld; + NextLo.next(/*nextDown=*/false); + + // We want to admit values where: + // 1. abs(Lo) <= ulp(Hi)/2 + // 2. Hi == RTNE(Hi + lo) + auto InLattice = [](const APFloat &Hi, const APFloat &Lo) { + return Hi + Lo == Hi; + }; + + // Check if (HiOld, nextUp(LoOld) is in the lattice. + if (InLattice(HiOld, NextLo)) { + // Yes, the result is (HiOld, nextUp(LoOld)). + Floats[1] = std::move(NextLo); + + // TODO: Because we currently rely on semPPCDoubleDoubleLegacy, our maximum + // value is defined to have exactly 106 bits of precision. This limitation + // results in semPPCDoubleDouble being unable to reach its maximum canonical + // value. + DoubleAPFloat Largest{*Semantics, uninitialized}; + Largest.makeLargest(/*Neg=*/false); + if (compare(Largest) == cmpGreaterThan) + makeInf(/*Neg=*/false); + + return opOK; + } + + // Now we need to handle the cases where (HiOld, nextUp(LoOld)) is not the + // correct result. We know the new hi component will be nextUp(HiOld) but our + // lattice rules make it a little ambiguous what the correct NextLo must be. + APFloat NextHi = HiOld; + NextHi.next(/*nextDown=*/false); + + // nextUp(getLargest()) == INFINITY + if (NextHi.isInfinity()) { + makeInf(/*Neg=*/false); + return opOK; + } + + // IEEE 754-2019 5.3.1: + // "If x is the negative number of least magnitude in x's format, nextUp(x) is + // -0." + if (NextHi.isZero()) { + makeZero(/*Neg=*/true); + return opOK; + } + + // abs(NextLo) must be <= ulp(NextHi)/2. We want NextLo to be as close to + // negative infinity as possible. + NextLo = neg(scalbn(harrisonUlp(NextHi), -1, rmTowardZero)); + if (!InLattice(NextHi, NextLo)) + // RTNE may mean that Lo must be < ulp(NextHi) / 2 so we bump NextLo. + NextLo.next(/*nextDown=*/false); + + Floats[0] = std::move(NextHi); + Floats[1] = std::move(NextLo); + + return opOK; } APFloat::opStatus diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp index 509cbb0..e8d3161 100644 --- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -813,8 +813,8 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { } } - if (!F.hasFnAttribute(Attribute::HybridPatchable) || F.isDeclaration() || - F.hasLocalLinkage() || + if (!F.hasFnAttribute(Attribute::HybridPatchable) || + F.isDeclarationForLinker() || F.hasLocalLinkage() || F.getName().ends_with(HybridPatchableTargetSuffix)) continue; @@ -857,7 +857,7 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { SetVector<GlobalValue *> DirectCalledFns; for (Function &F : Mod) - if (!F.isDeclaration() && + if (!F.isDeclarationForLinker() && F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native && F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) processFunction(F, DirectCalledFns, FnsMap); @@ -869,7 +869,8 @@ bool AArch64Arm64ECCallLowering::runOnModule(Module &Mod) { }; SmallVector<ThunkInfo> ThunkMapping; for (Function &F : Mod) { - if (!F.isDeclaration() && (!F.hasLocalLinkage() || F.hasAddressTaken()) && + if (!F.isDeclarationForLinker() && + (!F.hasLocalLinkage() || F.hasAddressTaken()) && F.getCallingConv() != CallingConv::ARM64EC_Thunk_Native && F.getCallingConv() != CallingConv::ARM64EC_Thunk_X64) { if (!F.hasComdat()) @@ -959,7 +960,7 @@ bool AArch64Arm64ECCallLowering::processFunction( // unprototyped functions in C) if (Function *F = CB->getCalledFunction()) { if (!LowerDirectToIndirect || F->hasLocalLinkage() || - F->isIntrinsic() || !F->isDeclaration()) + F->isIntrinsic() || !F->isDeclarationForLinker()) continue; DirectCalledFns.insert(F); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4f6e3dd..2b6ea86 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -162,10 +162,10 @@ static cl::opt<bool> UseFEATCPACodegen( cl::init(false)); /// Value type used for condition codes. -static const MVT MVT_CC = MVT::i32; +constexpr MVT CondCodeVT = MVT::i32; /// Value type used for NZCV flags. -static constexpr MVT FlagsVT = MVT::i32; +constexpr MVT FlagsVT = MVT::i32; static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, AArch64::X5, @@ -3472,6 +3472,12 @@ static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, } } +/// Like SelectionDAG::getCondCode(), but for AArch64 condition codes. +static SDValue getCondCode(SelectionDAG &DAG, AArch64CC::CondCode CC) { + // TODO: Should be TargetConstant (need to s/imm/timm in patterns). + return DAG.getConstant(CC, SDLoc(), CondCodeVT); +} + static bool isLegalArithImmed(uint64_t C) { // Matches AArch64DAGToDAGISel::SelectArithImmed(). bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0); @@ -3678,7 +3684,7 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, if (Opcode == 0) Opcode = AArch64ISD::CCMP; - SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); + SDValue Condition = getCondCode(DAG, Predicate); AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); @@ -4075,7 +4081,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, Cmp = emitComparison(LHS, RHS, CC, DL, DAG); AArch64CC = changeIntCCToAArch64CC(CC); } - AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC); + AArch64cc = getCondCode(DAG, AArch64CC); return Cmp; } @@ -4195,7 +4201,7 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const { AArch64CC::CondCode CC; SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, getInvertedCondCode(CC)); return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, Overflow); } @@ -4274,8 +4280,8 @@ static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG, SDLoc DL(Glue); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue One = DAG.getConstant(1, DL, VT); - unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS; - SDValue CC = DAG.getConstant(Cond, DL, MVT::i32); + AArch64CC::CondCode Cond = Invert ? AArch64CC::LO : AArch64CC::HS; + SDValue CC = getCondCode(DAG, Cond); return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Glue); } @@ -4285,7 +4291,7 @@ static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG) { SDLoc DL(Glue); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue One = DAG.getConstant(1, DL, VT); - SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32); + SDValue CC = getCondCode(DAG, AArch64CC::VS); return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Glue); } @@ -4334,7 +4340,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { // We use an inverted condition, because the conditional select is inverted // too. This will allow it to be selected to a single instruction: // CSINC Wd, WZR, WZR, invert(cond). - SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, getInvertedCondCode(CC)); Overflow = DAG.getNode(AArch64ISD::CSEL, DL, MVT::i32, FVal, TVal, CCVal, Overflow); @@ -7124,8 +7130,7 @@ SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, FlagsVT), Op.getOperand(0), DAG.getConstant(0, DL, VT)); return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg, - DAG.getConstant(AArch64CC::PL, DL, MVT::i32), - Cmp.getValue(1)); + getCondCode(DAG, AArch64CC::PL), Cmp.getValue(1)); } static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) { @@ -7136,7 +7141,7 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) { AArch64CC::CondCode CC; if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) { SDLoc DL(Op); - SDValue CCVal = DAG.getConstant(CC, DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, CC); return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal, Cmp); } @@ -10575,7 +10580,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (CC == ISD::SETNE) OFCC = getInvertedCondCode(OFCC); - SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, OFCC); return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal, Overflow); @@ -10648,7 +10653,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) && ProduceNonFlagSettingCondBr) { SDValue Cond = - DAG.getTargetConstant(changeIntCCToAArch64CC(CC), DL, MVT::i32); + DAG.getTargetConstant(changeIntCCToAArch64CC(CC), DL, CondCodeVT); return DAG.getNode(AArch64ISD::CB, DL, MVT::Other, Chain, Cond, LHS, RHS, Dest); } @@ -10667,11 +10672,11 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG); AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32); + SDValue CC1Val = getCondCode(DAG, CC1); SDValue BR1 = DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CC1Val, Cmp); if (CC2 != AArch64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32); + SDValue CC2Val = getCondCode(DAG, CC2); return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, BR1, Dest, CC2Val, Cmp); } @@ -11160,7 +11165,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (CC2 == AArch64CC::AL) { changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1, CC2); - SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32); + SDValue CC1Val = getCondCode(DAG, CC1); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be @@ -11173,11 +11178,11 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // of the first as the RHS. We're effectively OR'ing the two CC's together. // FIXME: It would be nice if we could match the two CSELs to two CSINCs. - SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32); + SDValue CC1Val = getCondCode(DAG, CC1); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, FVal, CC1Val, Cmp); - SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32); + SDValue CC2Val = getCondCode(DAG, CC2); Res = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, CS1, CC2Val, Cmp); } return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, DL) : Res; @@ -11205,8 +11210,7 @@ SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op, ISD::CondCode Cond = cast<CondCodeSDNode>(Op.getOperand(3))->get(); ISD::CondCode CondInv = ISD::getSetCCInverse(Cond, VT); - SDValue CCVal = - DAG.getConstant(changeIntCCToAArch64CC(CondInv), DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, changeIntCCToAArch64CC(CondInv)); // Inputs are swapped because the condition is inverted. This will allow // matching with a single CSINC instruction. return DAG.getNode(AArch64ISD::CSEL, DL, OpVT, FVal, TVal, CCVal, @@ -11360,18 +11364,6 @@ SDValue AArch64TargetLowering::LowerSELECT_CC( ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal); ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal); ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); - // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform - // into (OR (ASR lhs, N-1), 1), which requires less instructions for the - // supported types. - if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal && - CTVal->isOne() && CFVal->isAllOnes() && - LHS.getValueType() == TVal.getValueType()) { - EVT VT = LHS.getValueType(); - SDValue Shift = - DAG.getNode(ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getSizeInBits() - 1, DL, VT)); - return DAG.getNode(ISD::OR, DL, VT, Shift, DAG.getConstant(1, DL, VT)); - } // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns. // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1)) @@ -11577,13 +11569,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC( } // Emit first, and possibly only, CSEL. - SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32); + SDValue CC1Val = getCondCode(DAG, CC1); SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, FVal, CC1Val, Cmp); // If we need a second CSEL, emit it, using the output of the first as the // RHS. We're effectively OR'ing the two CC's together. if (CC2 != AArch64CC::AL) { - SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32); + SDValue CC2Val = getCondCode(DAG, CC2); return DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, CS1, CC2Val, Cmp); } @@ -11685,7 +11677,7 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, AArch64CC::CondCode OFCC; SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); - SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, OFCC); return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, Overflow); @@ -12525,10 +12517,10 @@ static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint) { /// WZR, invert(<cond>)'. static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL, SelectionDAG &DAG) { - return DAG.getNode( - AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32), NZCV); + return DAG.getNode(AArch64ISD::CSINC, DL, MVT::i32, + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + getCondCode(DAG, getInvertedCondCode(CC)), NZCV); } // Lower @cc flag output via getSETCC. @@ -18699,7 +18691,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, Created.push_back(Cmp.getNode()); Created.push_back(And.getNode()); } else { - SDValue CCVal = DAG.getConstant(AArch64CC::MI, DL, MVT_CC); + SDValue CCVal = getCondCode(DAG, AArch64CC::MI); SDVTList VTs = DAG.getVTList(VT, FlagsVT); SDValue Negs = DAG.getNode(AArch64ISD::SUBS, DL, VTs, Zero, N0); @@ -19571,11 +19563,11 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) { if (N->getOpcode() == ISD::AND) { AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0); - Condition = DAG.getConstant(InvCC0, DL, MVT_CC); + Condition = getCondCode(DAG, InvCC0); NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1); } else { AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1); - Condition = DAG.getConstant(CC0, DL, MVT_CC); + Condition = getCondCode(DAG, CC0); NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1); } @@ -19596,8 +19588,7 @@ static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) { Cmp1.getOperand(1), NZCVOp, Condition, Cmp0); } return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0), - CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32), - CCmp); + CSel0.getOperand(1), getCondCode(DAG, CC1), CCmp); } static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -19802,7 +19793,7 @@ static SDValue performANDSETCCCombine(SDNode *N, SDLoc DL(N); return DAG.getNode(AArch64ISD::CSINC, DL, VT, DAG.getConstant(0, DL, VT), DAG.getConstant(0, DL, VT), - DAG.getConstant(InvertedCC, DL, MVT::i32), Cmp); + getCondCode(DAG, InvertedCC), Cmp); } } return SDValue(); @@ -20793,7 +20784,7 @@ static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) { "Unexpected constant value"); SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0)); - SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32); + SDValue CCVal = getCondCode(DAG, AArch64CC); SDValue Cmp = LHS.getOperand(3); return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp); @@ -20979,7 +20970,7 @@ static SDValue foldADCToCINC(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); // (CINC x cc cond) <=> (CSINC x x !cc cond) - SDValue CC = DAG.getConstant(AArch64CC::LO, DL, MVT::i32); + SDValue CC = getCondCode(DAG, AArch64CC::LO); return DAG.getNode(AArch64ISD::CSINC, DL, VT, LHS, LHS, CC, Cond); } @@ -22052,7 +22043,7 @@ static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, // Convert CC to integer based on requested condition. // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare. - SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32); + SDValue CC = getCondCode(DAG, getInvertedCondCode(Cond)); SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test); return DAG.getZExtOrTrunc(Res, DL, VT); } @@ -25093,10 +25084,9 @@ static SDValue performBRCONDCombine(SDNode *N, auto CSelCC = getCSETCondCode(CSel); if (CSelCC) { SDLoc DL(N); - return DAG.getNode( - N->getOpcode(), DL, N->getVTList(), Chain, Dest, - DAG.getConstant(getInvertedCondCode(*CSelCC), DL, MVT::i32), - CSel.getOperand(3)); + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), Chain, Dest, + getCondCode(DAG, getInvertedCondCode(*CSelCC)), + CSel.getOperand(3)); } } @@ -25237,7 +25227,7 @@ static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) { SDLoc DL(Op); EVT VT = Op->getValueType(0); - SDValue CCValue = DAG.getConstant(CC, DL, MVT::i32); + SDValue CCValue = getCondCode(DAG, CC); return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond); } @@ -25314,8 +25304,7 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) { SDValue TValReassoc = Reassociate(TReassocOp, 0); SDValue FValReassoc = Reassociate(FReassocOp, 1); return DAG.getNode(AArch64ISD::CSEL, SDLoc(N), VT, TValReassoc, FValReassoc, - DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC), - NewCmp.getValue(1)); + getCondCode(DAG, NewCC), NewCmp.getValue(1)); }; auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2)); @@ -25456,8 +25445,7 @@ static SDValue performCSELCombine(SDNode *N, SDValue Sub = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), Cond.getOperand(1), Cond.getOperand(0)); return DAG.getNode(AArch64ISD::CSEL, DL, N->getVTList(), N->getOperand(0), - N->getOperand(1), - DAG.getConstant(NewCond, DL, MVT::i32), + N->getOperand(1), getCondCode(DAG, NewCond), Sub.getValue(1)); } } @@ -25557,10 +25545,9 @@ static SDValue performSETCCCombine(SDNode *N, auto NewCond = getInvertedCondCode(OldCond); // csel 0, 1, !cond, X - SDValue CSEL = - DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0), - LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32), - LHS.getOperand(3)); + SDValue CSEL = DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), + LHS.getOperand(0), LHS.getOperand(1), + getCondCode(DAG, NewCond), LHS.getOperand(3)); return DAG.getZExtOrTrunc(CSEL, DL, VT); } @@ -25630,8 +25617,7 @@ static SDValue performFlagSettingCombine(SDNode *N, // If the flag result isn't used, convert back to a generic opcode. if (!N->hasAnyUseOfValue(1)) { SDValue Res = DCI.DAG.getNode(GenericOpcode, DL, VT, N->ops()); - return DCI.DAG.getMergeValues({Res, DCI.DAG.getConstant(0, DL, MVT::i32)}, - DL); + return DCI.CombineTo(N, Res, SDValue(N, 1)); } // Combine identical generic nodes into this node, re-using the result. @@ -27013,10 +26999,10 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) { SDValue A = DAG.getNode( AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, FlagsVT, MVT::Other), N->getOperand(0), DAG.getConstant(Register, DL, MVT::i32)); - SDValue B = DAG.getNode( - AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(0, DL, MVT::i32), - DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1)); + SDValue B = DAG.getNode(AArch64ISD::CSINC, DL, MVT::i32, + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + getCondCode(DAG, AArch64CC::NE), A.getValue(1)); return DAG.getMergeValues( {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 251fd44..ac31236 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -448,8 +448,13 @@ def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3, SDTCisVT<1, FlagsVT>, SDTCisVT<4, FlagsVT>]>; +// Value type used for condition codes. +// Should be kept in sync with its C++ counterpart. +defvar CondCodeVT = i32; + def SDT_AArch64Brcond : SDTypeProfile<0, 3, - [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, + [SDTCisVT<0, OtherVT>, + SDTCisVT<1, CondCodeVT>, SDTCisVT<2, FlagsVT>]>; def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>; def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, @@ -458,22 +463,22 @@ def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, def SDT_AArch64CSel : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisInt<3>, + SDTCisVT<3, CondCodeVT>, SDTCisVT<4, FlagsVT>]>; def SDT_AArch64CCMP : SDTypeProfile<1, 5, [SDTCisVT<0, FlagsVT>, SDTCisInt<1>, SDTCisSameAs<1, 2>, SDTCisInt<3>, - SDTCisInt<4>, - SDTCisVT<5, i32>]>; + SDTCisVT<4, CondCodeVT>, + SDTCisVT<5, FlagsVT>]>; def SDT_AArch64FCCMP : SDTypeProfile<1, 5, [SDTCisVT<0, FlagsVT>, SDTCisFP<1>, SDTCisSameAs<1, 2>, SDTCisInt<3>, - SDTCisInt<4>, - SDTCisVT<5, i32>]>; + SDTCisVT<4, CondCodeVT>, + SDTCisVT<5, FlagsVT>]>; def SDT_AArch64FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, FlagsVT>, SDTCisFP<1>, SDTCisSameAs<2, 1>]>; @@ -546,7 +551,8 @@ def SDT_AArch64TBL : SDTypeProfile<1, 2, [ ]>; def SDT_AArch64cb : SDTypeProfile<0, 4, - [SDTCisVT<0, i32>, SDTCisInt<1>, SDTCisInt<2>, + [SDTCisVT<0, CondCodeVT>, + SDTCisInt<1>, SDTCisInt<2>, SDTCisVT<3, OtherVT>]>; // non-extending masked load fragment. diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 18ca22f..e1adc0b 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -270,6 +270,13 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { SMECallAttrs CallAttrs(*Caller, *Callee); + // Never inline a function explicitly marked as being streaming, + // into a non-streaming function. Assume it was marked as streaming + // for a reason. + if (CallAttrs.caller().hasNonStreamingInterfaceAndBody() && + CallAttrs.callee().hasStreamingInterfaceOrBody()) + return false; + // When inlining, we should consider the body of the function, not the // interface. if (CallAttrs.callee().hasStreamingBody()) { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index b9d3e1b..6912caf 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -461,7 +461,7 @@ void AArch64AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FulleSizeInBytes = getFixupKindContainereSizeInBytes(Fixup.getKind()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index c8e45d4..0894e26 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3204,6 +3204,18 @@ void AMDGPURegisterBankInfo::applyMappingImpl( constrainOpWithReadfirstlane(B, MI, 5); return; } + case Intrinsic::amdgcn_permlane_bcast: + case Intrinsic::amdgcn_permlane_up: + case Intrinsic::amdgcn_permlane_down: + case Intrinsic::amdgcn_permlane_xor: + // Doing a waterfall loop over these wouldn't make any sense. + constrainOpWithReadfirstlane(B, MI, 3); + constrainOpWithReadfirstlane(B, MI, 4); + return; + case Intrinsic::amdgcn_permlane_idx_gen: { + constrainOpWithReadfirstlane(B, MI, 3); + return; + } case Intrinsic::amdgcn_sbfe: applyMappingBFE(B, OpdMapper, true); return; @@ -4591,6 +4603,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp8: case Intrinsic::amdgcn_cvt_scale_pk8_f32_bf8: case Intrinsic::amdgcn_cvt_scale_pk8_f32_fp4: + case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_bf16: + case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_bf16: + case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f16: + case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f16: + case Intrinsic::amdgcn_cvt_scalef32_pk8_fp8_f32: + case Intrinsic::amdgcn_cvt_scalef32_pk8_bf8_f32: + case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f32: + case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_f16: + case Intrinsic::amdgcn_cvt_scalef32_pk8_fp4_bf16: case Intrinsic::amdgcn_sat_pk4_i4_i8: case Intrinsic::amdgcn_sat_pk4_u4_u8: case Intrinsic::amdgcn_fmed3: @@ -4902,6 +4923,24 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[5] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI); break; } + case Intrinsic::amdgcn_permlane_bcast: + case Intrinsic::amdgcn_permlane_up: + case Intrinsic::amdgcn_permlane_down: + case Intrinsic::amdgcn_permlane_xor: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + OpdsMapping[3] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + OpdsMapping[4] = getSGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI); + break; + } + case Intrinsic::amdgcn_permlane_idx_gen: { + unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + OpdsMapping[3] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI); + break; + } case Intrinsic::amdgcn_permlane16_var: case Intrinsic::amdgcn_permlanex16_var: { unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td index dfe0cbf..10b8606 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -321,6 +321,11 @@ def : SourceOfDivergence<int_amdgcn_permlane16>; def : SourceOfDivergence<int_amdgcn_permlanex16>; def : SourceOfDivergence<int_amdgcn_permlane16_var>; def : SourceOfDivergence<int_amdgcn_permlanex16_var>; +def : SourceOfDivergence<int_amdgcn_permlane_bcast>; +def : SourceOfDivergence<int_amdgcn_permlane_up>; +def : SourceOfDivergence<int_amdgcn_permlane_down>; +def : SourceOfDivergence<int_amdgcn_permlane_xor>; +def : SourceOfDivergence<int_amdgcn_permlane_idx_gen>; def : SourceOfDivergence<int_amdgcn_mov_dpp>; def : SourceOfDivergence<int_amdgcn_mov_dpp8>; def : SourceOfDivergence<int_amdgcn_update_dpp>; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 94886b0..96cb5ae 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -152,7 +152,12 @@ static bool isPermlane(const MachineInstr &MI) { Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e32 || Opcode == AMDGPU::V_PERMLANE16_SWAP_B32_e64 || Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e32 || - Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64; + Opcode == AMDGPU::V_PERMLANE32_SWAP_B32_e64 || + Opcode == AMDGPU::V_PERMLANE_BCAST_B32_e64 || + Opcode == AMDGPU::V_PERMLANE_UP_B32_e64 || + Opcode == AMDGPU::V_PERMLANE_DOWN_B32_e64 || + Opcode == AMDGPU::V_PERMLANE_XOR_B32_e64 || + Opcode == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64; } static bool isLdsDma(const MachineInstr &MI) { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 33b66a6..96d5668 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -592,10 +592,13 @@ bool GCNMaxILPSchedStrategy::tryCandidate(SchedCandidate &Cand, // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -666,10 +669,13 @@ bool GCNMaxMemoryClauseSchedStrategy::tryCandidate(SchedCandidate &Cand, // MaxMemoryClause-specific: We prioritize clustered instructions as we would // get more benefit from clausing these memory instructions. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -936,11 +942,9 @@ void GCNScheduleDAGMILive::finalizeSchedule() { Pressure.resize(Regions.size()); RegionsWithHighRP.resize(Regions.size()); RegionsWithExcessRP.resize(Regions.size()); - RegionsWithMinOcc.resize(Regions.size()); RegionsWithIGLPInstrs.resize(Regions.size()); RegionsWithHighRP.reset(); RegionsWithExcessRP.reset(); - RegionsWithMinOcc.reset(); RegionsWithIGLPInstrs.reset(); runSchedStages(); @@ -1090,8 +1094,7 @@ bool PreRARematStage::initGCNSchedStage() { // fixed if there is another pass after this pass. assert(!S.hasNextStage()); - if (!GCNSchedStage::initGCNSchedStage() || DAG.RegionsWithMinOcc.none() || - DAG.Regions.size() == 1) + if (!GCNSchedStage::initGCNSchedStage() || DAG.Regions.size() == 1) return false; // Before performing any IR modification record the parent region of each MI @@ -1133,11 +1136,6 @@ void UnclusteredHighRPStage::finalizeGCNSchedStage() { SavedMutations.swap(DAG.Mutations); S.SGPRLimitBias = S.VGPRLimitBias = 0; if (DAG.MinOccupancy > InitialOccupancy) { - for (unsigned IDX = 0; IDX < DAG.Pressure.size(); ++IDX) - DAG.RegionsWithMinOcc[IDX] = - DAG.Pressure[IDX].getOccupancy( - DAG.ST, DAG.MFI.getDynamicVGPRBlockSize()) == DAG.MinOccupancy; - LLVM_DEBUG(dbgs() << StageID << " stage successfully increased occupancy to " << DAG.MinOccupancy << '\n'); @@ -1209,11 +1207,15 @@ bool GCNSchedStage::initGCNRegion() { } bool UnclusteredHighRPStage::initGCNRegion() { - // Only reschedule regions with the minimum occupancy or regions that may have - // spilling (excess register pressure). - if ((!DAG.RegionsWithMinOcc[RegionIdx] || - DAG.MinOccupancy <= InitialOccupancy) && - !DAG.RegionsWithExcessRP[RegionIdx]) + // Only reschedule regions that have excess register pressure (i.e. spilling) + // or had minimum occupancy at the beginning of the stage (as long as + // rescheduling of previous regions did not make occupancy drop back down to + // the initial minimum). + unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize(); + if (!DAG.RegionsWithExcessRP[RegionIdx] && + (DAG.MinOccupancy <= InitialOccupancy || + DAG.Pressure[RegionIdx].getOccupancy(ST, DynamicVGPRBlockSize) != + InitialOccupancy)) return false; return GCNSchedStage::initGCNRegion(); @@ -1278,9 +1280,6 @@ void GCNSchedStage::checkScheduling() { if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit && PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) <= S.VGPRCriticalLimit) { DAG.Pressure[RegionIdx] = PressureAfter; - DAG.RegionsWithMinOcc[RegionIdx] = - PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) == - DAG.MinOccupancy; // Early out if we have achieved the occupancy target. LLVM_DEBUG(dbgs() << "Pressure in desired limits, done.\n"); @@ -1314,7 +1313,6 @@ void GCNSchedStage::checkScheduling() { if (NewOccupancy < DAG.MinOccupancy) { DAG.MinOccupancy = NewOccupancy; MFI.limitOccupancy(DAG.MinOccupancy); - DAG.RegionsWithMinOcc.reset(); LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to " << DAG.MinOccupancy << ".\n"); } @@ -1336,14 +1334,10 @@ void GCNSchedStage::checkScheduling() { // Revert if this region's schedule would cause a drop in occupancy or // spilling. - if (shouldRevertScheduling(WavesAfter)) { + if (shouldRevertScheduling(WavesAfter)) revertScheduling(); - } else { + else DAG.Pressure[RegionIdx] = PressureAfter; - DAG.RegionsWithMinOcc[RegionIdx] = - PressureAfter.getOccupancy(ST, DynamicVGPRBlockSize) == - DAG.MinOccupancy; - } } unsigned @@ -1573,9 +1567,6 @@ bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) { } void GCNSchedStage::revertScheduling() { - DAG.RegionsWithMinOcc[RegionIdx] = - PressureBefore.getOccupancy(ST, DAG.MFI.getDynamicVGPRBlockSize()) == - DAG.MinOccupancy; LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); DAG.RegionEnd = DAG.RegionBegin; int SkippedDebugInstr = 0; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 94cd795..32139a9 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -250,9 +250,6 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive { // limit. Register pressure in these regions usually will result in spilling. BitVector RegionsWithExcessRP; - // Regions that has the same occupancy as the latest MinOccupancy - BitVector RegionsWithMinOcc; - // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT). BitVector RegionsWithIGLPInstrs; diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp index 0a0a107..0237a60 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp @@ -340,6 +340,43 @@ void GCNSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, Policy.ShouldTrackLaneMasks = true; } +void GCNSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy, + const SchedRegion &Region) const { + const Function &F = Region.RegionBegin->getMF()->getFunction(); + Attribute PostRADirectionAttr = F.getFnAttribute("amdgpu-post-ra-direction"); + if (!PostRADirectionAttr.isValid()) + return; + + StringRef PostRADirectionStr = PostRADirectionAttr.getValueAsString(); + if (PostRADirectionStr == "topdown") { + Policy.OnlyTopDown = true; + Policy.OnlyBottomUp = false; + } else if (PostRADirectionStr == "bottomup") { + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = true; + } else if (PostRADirectionStr == "bidirectional") { + Policy.OnlyTopDown = false; + Policy.OnlyBottomUp = false; + } else { + DiagnosticInfoOptimizationFailure Diag( + F, F.getSubprogram(), "invalid value for postRA direction attribute"); + F.getContext().diagnose(Diag); + } + + LLVM_DEBUG({ + const char *DirStr = "default"; + if (Policy.OnlyTopDown && !Policy.OnlyBottomUp) + DirStr = "topdown"; + else if (!Policy.OnlyTopDown && Policy.OnlyBottomUp) + DirStr = "bottomup"; + else if (!Policy.OnlyTopDown && !Policy.OnlyBottomUp) + DirStr = "bidirectional"; + + dbgs() << "Post-MI-sched direction (" << F.getName() << "): " << DirStr + << '\n'; + }); +} + void GCNSubtarget::mirFileLoaded(MachineFunction &MF) const { if (isWave32()) { // Fix implicit $vcc operands after MIParser has verified that they match diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index bdd900d..6fe3abc 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1041,6 +1041,9 @@ public: void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override; + void overridePostRASchedPolicy(MachineSchedPolicy &Policy, + const SchedRegion &Region) const override; + void mirFileLoaded(MachineFunction &MF) const override; unsigned getMaxNumUserSGPRs() const { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 2a920f6..86d56855 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -149,7 +149,7 @@ void AMDGPUAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); uint32_t Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the bits from // the fixup value. diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 11552b3..9b348d4 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -983,6 +983,7 @@ void SIFrameLowering::emitCSRSpillStores( const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch // registers. However, save all lanes of callee-saved VGPRs. Due to this, we @@ -1005,6 +1006,12 @@ void SIFrameLowering::emitCSRSpillStores( } }; + for (const Register Reg : make_first_range(WWMScratchRegs)) { + if (!MRI.isReserved(Reg)) { + MRI.addLiveIn(Reg); + MBB.addLiveIn(Reg); + } + } StoreWWMRegisters(WWMScratchRegs); auto EnableAllLanes = [&]() { diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ad26757..4d67e4a 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -16825,56 +16825,51 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_, return std::pair(0U, RC); } - if (Constraint.starts_with("{") && Constraint.ends_with("}")) { - StringRef RegName(Constraint.data() + 1, Constraint.size() - 2); - if (RegName.consume_front("v")) { + auto [Kind, Idx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Constraint); + if (Kind != '\0') { + if (Kind == 'v') { RC = &AMDGPU::VGPR_32RegClass; - } else if (RegName.consume_front("s")) { + } else if (Kind == 's') { RC = &AMDGPU::SGPR_32RegClass; - } else if (RegName.consume_front("a")) { + } else if (Kind == 'a') { RC = &AMDGPU::AGPR_32RegClass; } if (RC) { - uint32_t Idx; - if (RegName.consume_front("[")) { - uint32_t End; - bool Failed = RegName.consumeInteger(10, Idx); - Failed |= !RegName.consume_front(":"); - Failed |= RegName.consumeInteger(10, End); - Failed |= !RegName.consume_back("]"); - if (!Failed) { - uint32_t Width = (End - Idx + 1) * 32; - // Prohibit constraints for register ranges with a width that does not - // match the required type. - if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits()) + if (NumRegs > 1) { + if (Idx >= RC->getNumRegs() || Idx + NumRegs - 1 > RC->getNumRegs()) + return std::pair(0U, nullptr); + + uint32_t Width = NumRegs * 32; + // Prohibit constraints for register ranges with a width that does not + // match the required type. + if (VT.SimpleTy != MVT::Other && Width != VT.getSizeInBits()) + return std::pair(0U, nullptr); + + MCRegister Reg = RC->getRegister(Idx); + if (SIRegisterInfo::isVGPRClass(RC)) + RC = TRI->getVGPRClassForBitWidth(Width); + else if (SIRegisterInfo::isSGPRClass(RC)) + RC = TRI->getSGPRClassForBitWidth(Width); + else if (SIRegisterInfo::isAGPRClass(RC)) + RC = TRI->getAGPRClassForBitWidth(Width); + if (RC) { + Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC); + if (!Reg) { + // The register class does not contain the requested register, + // e.g., because it is an SGPR pair that would violate alignment + // requirements. return std::pair(0U, nullptr); - MCRegister Reg = RC->getRegister(Idx); - if (SIRegisterInfo::isVGPRClass(RC)) - RC = TRI->getVGPRClassForBitWidth(Width); - else if (SIRegisterInfo::isSGPRClass(RC)) - RC = TRI->getSGPRClassForBitWidth(Width); - else if (SIRegisterInfo::isAGPRClass(RC)) - RC = TRI->getAGPRClassForBitWidth(Width); - if (RC) { - Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, RC); - if (!Reg) { - // The register class does not contain the requested register, - // e.g., because it is an SGPR pair that would violate alignment - // requirements. - return std::pair(0U, nullptr); - } - return std::pair(Reg, RC); } + return std::pair(Reg, RC); } - } else { - // Check for lossy scalar/vector conversions. - if (VT.isVector() && VT.getSizeInBits() != 32) - return std::pair(0U, nullptr); - bool Failed = RegName.getAsInteger(10, Idx); - if (!Failed && Idx < RC->getNumRegs()) - return std::pair(RC->getRegister(Idx), RC); } + + // Check for lossy scalar/vector conversions. + if (VT.isVector() && VT.getSizeInBits() != 32) + return std::pair(0U, nullptr); + if (Idx < RC->getNumRegs()) + return std::pair(RC->getRegister(Idx), RC); } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 044a681..3f61bbd 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6304,10 +6304,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, }; if (Opc == AMDGPU::V_PERMLANE16_B32_e64 || - Opc == AMDGPU::V_PERMLANEX16_B32_e64) { + Opc == AMDGPU::V_PERMLANEX16_B32_e64 || + Opc == AMDGPU::V_PERMLANE_BCAST_B32_e64 || + Opc == AMDGPU::V_PERMLANE_UP_B32_e64 || + Opc == AMDGPU::V_PERMLANE_DOWN_B32_e64 || + Opc == AMDGPU::V_PERMLANE_XOR_B32_e64 || + Opc == AMDGPU::V_PERMLANE_IDX_GEN_B32_e64) { // src1 and src2 must be scalar MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]); - MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); const DebugLoc &DL = MI.getDebugLoc(); if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); @@ -6315,11 +6319,14 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, .add(Src1); Src1.ChangeToRegister(Reg, false); } - if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { - Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); - BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) - .add(Src2); - Src2.ChangeToRegister(Reg, false); + if (VOP3Idx[2] != -1) { + MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); + if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { + Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src2); + Src2.ChangeToRegister(Reg, false); + } } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a3e20ba..38b609c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2955,6 +2955,12 @@ def VOP_V8BF16_I32_I32 : VOPProfile<[v8bf16, i32, i32, untyped]>; def VOP_V16F32_V3I32_I32 : VOPProfile<[v16f32, v3i32, i32, untyped]>; def VOP_V8F32_V2I32_I32 : VOPProfile<[v8f32, v2i32, i32, untyped]>; def VOP_V8F32_I32_I32 : VOPProfile<[v8f32, i32, i32, untyped]>; +def VOP_V2I32_V8BF16_F32 : VOPProfile<[v2i32, v8bf16, f32, untyped]>; +def VOP_V2I32_V8F16_F32 : VOPProfile<[v2i32, v8f16, f32, untyped]>; +def VOP_V2I32_V8F32_F32 : VOPProfile<[v2i32, v8f32, f32, untyped]>; +def VOP_I32_V8F32_F32 : VOPProfile<[i32, v8f32, f32, untyped]>; +def VOP_I32_V8F16_F32 : VOPProfile<[i32, v8f16, f32, untyped]>; +def VOP_I32_V8BF16_F32 : VOPProfile<[i32, v8bf16, f32, untyped]>; def VOP_I32_F32_I32_F32 : VOPProfile<[i32, f32, i32, f32]>; def VOP_V6I32_V32BF16_I32_F32 : VOPProfile<[v6i32, v32bf16, i32, f32]>; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5827f18..65fa088 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1548,6 +1548,42 @@ bool shouldEmitConstantsToTextSection(const Triple &TT) { return TT.getArch() == Triple::r600; } +static bool isValidRegPrefix(char C) { + return C == 'v' || C == 's' || C == 'a'; +} + +std::tuple<char, unsigned, unsigned> +parseAsmConstraintPhysReg(StringRef Constraint) { + StringRef RegName = Constraint; + if (!RegName.consume_front("{") || !RegName.consume_back("}")) + return {}; + + char Kind = RegName.front(); + if (!isValidRegPrefix(Kind)) + return {}; + + RegName = RegName.drop_front(); + if (RegName.consume_front("[")) { + unsigned Idx, End; + bool Failed = RegName.consumeInteger(10, Idx); + Failed |= !RegName.consume_front(":"); + Failed |= RegName.consumeInteger(10, End); + Failed |= !RegName.consume_back("]"); + if (!Failed) { + unsigned NumRegs = End - Idx + 1; + if (NumRegs > 1) + return {Kind, Idx, NumRegs}; + } + } else { + unsigned Idx; + bool Failed = RegName.getAsInteger(10, Idx); + if (!Failed) + return {Kind, Idx, 1}; + } + + return {}; +} + std::pair<unsigned, unsigned> getIntegerPairAttribute(const Function &F, StringRef Name, std::pair<unsigned, unsigned> Default, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 74d59f4..1252e35 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1012,6 +1012,12 @@ bool isReadOnlySegment(const GlobalValue *GV); /// target triple \p TT, false otherwise. bool shouldEmitConstantsToTextSection(const Triple &TT); +/// Returns a valid charcode or 0 in the first entry if this is a valid physical +/// register constraint. Followed by the start register number, and the register +/// width. Does not validate the number of registers exists in the class. +std::tuple<char, unsigned, unsigned> +parseAsmConstraintPhysReg(StringRef Constraint); + /// \returns Integer value requested using \p F's \p Name attribute. /// /// \returns \p Default if attribute is not present. diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 1ffe39d..f1ed9380 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -1053,6 +1053,14 @@ def VOP3_PERMLANE_VAR_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, untyped let HasExtDPP = 0; } +class VOP3_PERMLANE_NOOPSEL_Profile<VOPProfile P> : VOP3_Profile<P> { + let Ins64 = !con((ins VRegSrc_32:$src0, SSrc_b32:$src1), + !if(P.HasSrc2, (ins SSrc_b32:$src2), (ins))); + let HasClamp = 0; + let HasExtVOP3DPP = 0; + let HasExtDPP = 0; +} + def opsel_i1timm : SDNodeXForm<timm, [{ return CurDAG->getTargetConstant( N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE, @@ -1136,6 +1144,18 @@ class PermlaneVarPat<SDPatternOperator permlane, VGPR_32:$src1, VGPR_32:$vdst_in) >; +class PermlaneNoDppPat3Src<SDPatternOperator permlane, + Instruction inst> : GCNPat< + (permlane i32:$src0, i32:$src1, i32:$src2), + (inst VGPR_32:$src0, SCSrc_b32:$src1, SCSrc_b32:$src2) +>; + +class PermlaneNoDppPat2Src<SDPatternOperator permlane, + Instruction inst> : GCNPat< + (permlane i32:$src0, i32:$src1), + (inst VGPR_32:$src0, SCSrc_b32:$src1) +>; + class VOP3_BITOP3_Profile<VOPProfile pfl, VOP3Features f> : VOP3_Profile<pfl, f> { let HasClamp = 0; let HasOMod = 0; @@ -1522,6 +1542,20 @@ let SubtargetPredicate = isGFX12Plus in { } // End SubtargetPredicate = isGFX12Plus +let SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32 in { + defm V_PERMLANE_BCAST_B32 : VOP3Inst<"v_permlane_bcast_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>; + defm V_PERMLANE_UP_B32 : VOP3Inst<"v_permlane_up_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>; + defm V_PERMLANE_DOWN_B32 : VOP3Inst<"v_permlane_down_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>; + defm V_PERMLANE_XOR_B32 : VOP3Inst<"v_permlane_xor_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32_I32>>; + defm V_PERMLANE_IDX_GEN_B32 : VOP3Inst<"v_permlane_idx_gen_b32", VOP3_PERMLANE_NOOPSEL_Profile<VOP_I32_I32_I32>>; + + def : PermlaneNoDppPat3Src<int_amdgcn_permlane_bcast, V_PERMLANE_BCAST_B32_e64>; + def : PermlaneNoDppPat3Src<int_amdgcn_permlane_up, V_PERMLANE_UP_B32_e64>; + def : PermlaneNoDppPat3Src<int_amdgcn_permlane_down, V_PERMLANE_DOWN_B32_e64>; + def : PermlaneNoDppPat3Src<int_amdgcn_permlane_xor, V_PERMLANE_XOR_B32_e64>; + def : PermlaneNoDppPat2Src<int_amdgcn_permlane_idx_gen, V_PERMLANE_IDX_GEN_B32_e64>; +} // End SubtargetPredicate = isGFX1250Plus, WaveSizePredicate = isWave32 + let HasClamp = 0, HasModifiers = 1 in { def BitOp3_B16_Profile : VOP3_BITOP3_Profile<VOPProfile <[i16, i16, i16, i16, i32]>, VOP3_OPSEL>; def BitOp3_B16_t16_Profile : VOP3_Profile_True16<BitOp3_B16_Profile>; @@ -1744,6 +1778,20 @@ let SubtargetPredicate = isGFX1250Plus in { defm V_CVT_SCALE_PK8_F32_FP4 : VOP3CvtScaleSelInst<"v_cvt_scale_pk8_f32_fp4", VOP_V8F32_I32_I32, int_amdgcn_cvt_scale_pk8_f32_fp4>; } // End ReadsModeReg = 0 + let Constraints = "@earlyclobber $vdst" in { + let WaveSizePredicate = isWave32 in { + defm V_CVT_SCALEF32_PK8_FP8_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_bf16>; + defm V_CVT_SCALEF32_PK8_BF8_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_bf16>; + defm V_CVT_SCALEF32_PK8_FP8_F16 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_f16>; + defm V_CVT_SCALEF32_PK8_BF8_F16 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_f16>; + defm V_CVT_SCALEF32_PK8_FP8_F32 : VOP3Inst<"v_cvt_scalef32_pk8_fp8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_fp8_f32>; + defm V_CVT_SCALEF32_PK8_BF8_F32 : VOP3Inst<"v_cvt_scalef32_pk8_bf8_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_V2I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_bf8_f32>; + defm V_CVT_SCALEF32_PK8_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_f32", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F32_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_f32>; + defm V_CVT_SCALEF32_PK8_FP4_F16 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_f16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8F16_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_f16>; + defm V_CVT_SCALEF32_PK8_FP4_BF16 : VOP3Inst<"v_cvt_scalef32_pk8_fp4_bf16", VOP3_CVT_SCALEF32_PK_F864_Profile<VOP_I32_V8BF16_F32>, int_amdgcn_cvt_scalef32_pk8_fp4_bf16>; + } // End WaveSizePredicate = isWave32 + } // End Constraints = "@earlyclobber $vdst" + let True16Predicate = UseRealTrue16Insts in { def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f16, V_CVT_SR_FP8_F16_t16_e64, f16>; def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f16, V_CVT_SR_BF8_F16_t16_e64, f16>; @@ -1973,6 +2021,11 @@ defm V_ADD_MAX_I32 : VOP3Only_Realtriple_gfx1250<0x25e>; defm V_ADD_MAX_U32 : VOP3Only_Realtriple_gfx1250<0x25f>; defm V_ADD_MIN_I32 : VOP3Only_Realtriple_gfx1250<0x260>; defm V_ADD_MIN_U32 : VOP3Only_Realtriple_gfx1250<0x261>; +defm V_PERMLANE_BCAST_B32 : VOP3Only_Real_Base_gfx12<0x270>; +defm V_PERMLANE_UP_B32 : VOP3Only_Real_Base_gfx12<0x271>; +defm V_PERMLANE_DOWN_B32 : VOP3Only_Real_Base_gfx12<0x272>; +defm V_PERMLANE_XOR_B32 : VOP3Only_Real_Base_gfx12<0x273>; +defm V_PERMLANE_IDX_GEN_B32 : VOP3Only_Real_Base_gfx12<0x314>; //===----------------------------------------------------------------------===// // GFX11, GFX12 @@ -2159,6 +2212,15 @@ defm V_CVT_SCALE_PK8_F32_FP8 : VOP3Only_ScaleSel_Real_gfx1250<0x2aa>; defm V_CVT_SCALE_PK8_F16_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ab>; defm V_CVT_SCALE_PK8_BF16_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ac>; defm V_CVT_SCALE_PK8_F32_BF8 : VOP3Only_ScaleSel_Real_gfx1250<0x2ad>; +defm V_CVT_SCALEF32_PK8_FP4_F32 : VOP3Only_Real_Base_gfx1250<0x2b0>; +defm V_CVT_SCALEF32_PK8_FP4_F16 : VOP3Only_Real_Base_gfx1250<0x2b3>; +defm V_CVT_SCALEF32_PK8_FP8_BF16 : VOP3Only_Real_Base_gfx1250<0x2b4>; +defm V_CVT_SCALEF32_PK8_BF8_BF16 : VOP3Only_Real_Base_gfx1250<0x2b5>; +defm V_CVT_SCALEF32_PK8_FP4_BF16 : VOP3Only_Real_Base_gfx1250<0x2b8>; +defm V_CVT_SCALEF32_PK8_FP8_F32 : VOP3Only_Real_Base_gfx1250<0x2c3>; +defm V_CVT_SCALEF32_PK8_FP8_F16 : VOP3Only_Real_Base_gfx1250<0x2c4>; +defm V_CVT_SCALEF32_PK8_BF8_F32 : VOP3Only_Real_Base_gfx1250<0x2c5>; +defm V_CVT_SCALEF32_PK8_BF8_F16 : VOP3Only_Real_Base_gfx1250<0x2c6>; defm V_CVT_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36d>; defm V_CVT_SR_PK_BF16_F32 : VOP3Only_Realtriple_gfx1250<0x36e>; defm V_CVT_PK_F16_F32 : VOP3Only_Realtriple_gfx1250<0x36f>; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index bd4b75f..9366256 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5521,18 +5521,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal); ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS); if (Op.getValueType().isInteger()) { - // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform - // into (OR (ASR lhs, N-1), 1), which requires less instructions for the - // supported types. - if (CC == ISD::SETGT && RHSC && RHSC->isAllOnes() && CTVal && CFVal && - CTVal->isOne() && CFVal->isAllOnes() && - LHS.getValueType() == TrueVal.getValueType()) { - EVT VT = LHS.getValueType(); - SDValue Shift = - DAG.getNode(ISD::SRA, dl, VT, LHS, - DAG.getConstant(VT.getSizeInBits() - 1, dl, VT)); - return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT)); - } // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns. // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1)) diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 146fc67..dfa3de3c 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -1125,7 +1125,7 @@ void ARMAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, const unsigned NumBytes = getFixupKindNumBytes(Kind); unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FullSizeBytes; diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp index 128cc0b..38444f9 100644 --- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp +++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp @@ -398,7 +398,7 @@ void AVRAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp index 694d9ea..1bd82fad 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp @@ -220,7 +220,7 @@ void CSKYAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned Offset = Fixup.getOffset(); unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp index 7d3074b..d5b7a75 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp @@ -669,7 +669,7 @@ void HexagonAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, // to a real offset before we can use it. uint32_t Offset = Fixup.getOffset(); unsigned NumBytes = getFixupKindNumBytes(Kind); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); char *InstAddr = Data.data() + Offset; Value = adjustFixupValue(Kind, FixupValue); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp index d9ea88c..858f3d0 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp @@ -169,7 +169,7 @@ void LoongArchAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned Offset = Fixup.getOffset(); unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. for (unsigned I = 0; I != NumBytes; ++I) { diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp index 5e03903..7ef705d 100644 --- a/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp +++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kAsmBackend.cpp @@ -85,7 +85,7 @@ void M68kAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, Asm->getWriter().recordRelocation(F, Fixup, Target, Value); unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind()); - assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); + assert(Fixup.getOffset() + Size <= F.getSize() && "Invalid fixup offset!"); // Check that uppper bits are either all zeros or all ones. // Specifically ignore overflow/underflow as long as the leakage is // limited to the lower bits. This is to remain compatible with diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp index 29e5bfa..b513503 100644 --- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp +++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp @@ -120,7 +120,7 @@ void MSP430AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned Offset = Fixup.getOffset(); unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 65d1be3..15f45a1 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -382,6 +382,54 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, } } +// We return an EVT that can hold N VTs +// If the VT is a vector, the resulting EVT is a flat vector with the same +// element type as VT's element type. +static EVT getVectorizedVT(EVT VT, unsigned N, LLVMContext &C) { + if (N == 1) + return VT; + + return VT.isVector() ? EVT::getVectorVT(C, VT.getScalarType(), + VT.getVectorNumElements() * N) + : EVT::getVectorVT(C, VT, N); +} + +static SDValue getExtractVectorizedValue(SDValue V, unsigned I, EVT VT, + const SDLoc &dl, SelectionDAG &DAG) { + if (V.getValueType() == VT) { + assert(I == 0 && "Index must be 0 for scalar value"); + return V; + } + + if (!VT.isVector()) + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, V, + DAG.getVectorIdxConstant(I, dl)); + + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, V, + DAG.getVectorIdxConstant(I * VT.getVectorNumElements(), dl)); +} + +template <typename T> +static inline SDValue getBuildVectorizedValue(unsigned N, const SDLoc &dl, + SelectionDAG &DAG, T GetElement) { + if (N == 1) + return GetElement(0); + + SmallVector<SDValue, 8> Values; + for (const unsigned I : llvm::seq(N)) { + SDValue Val = GetElement(I); + if (Val.getValueType().isVector()) + DAG.ExtractVectorElements(Val, Values); + else + Values.push_back(Val); + } + + EVT VT = EVT::getVectorVT(*DAG.getContext(), Values[0].getValueType(), + Values.size()); + return DAG.getBuildVector(VT, dl, Values); +} + /// PromoteScalarIntegerPTX /// Used to make sure the arguments/returns are suitable for passing /// and promote them to a larger size if they're not. @@ -420,9 +468,10 @@ static EVT promoteScalarIntegerPTX(const EVT VT) { // parameter starting at index Idx using a single vectorized op of // size AccessSize. If so, it returns the number of param pieces // covered by the vector op. Otherwise, it returns 1. -static unsigned CanMergeParamLoadStoresStartingAt( +template <typename T> +static unsigned canMergeParamLoadStoresStartingAt( unsigned Idx, uint32_t AccessSize, const SmallVectorImpl<EVT> &ValueVTs, - const SmallVectorImpl<uint64_t> &Offsets, Align ParamAlignment) { + const SmallVectorImpl<T> &Offsets, Align ParamAlignment) { // Can't vectorize if param alignment is not sufficient. if (ParamAlignment < AccessSize) @@ -472,10 +521,11 @@ static unsigned CanMergeParamLoadStoresStartingAt( // of the same size as ValueVTs indicating how each piece should be // loaded/stored (i.e. as a scalar, or as part of a vector // load/store). +template <typename T> static SmallVector<unsigned, 16> VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, - const SmallVectorImpl<uint64_t> &Offsets, - Align ParamAlignment, bool IsVAArg = false) { + const SmallVectorImpl<T> &Offsets, Align ParamAlignment, + bool IsVAArg = false) { // Set vector size to match ValueVTs and mark all elements as // scalars by default. @@ -486,7 +536,7 @@ VectorizePTXValueVTs(const SmallVectorImpl<EVT> &ValueVTs, const auto GetNumElts = [&](unsigned I) -> unsigned { for (const unsigned AccessSize : {16, 8, 4, 2}) { - const unsigned NumElts = CanMergeParamLoadStoresStartingAt( + const unsigned NumElts = canMergeParamLoadStoresStartingAt( I, AccessSize, ValueVTs, Offsets, ParamAlignment); assert((NumElts == 1 || NumElts == 2 || NumElts == 4) && "Unexpected vectorization size"); @@ -1384,6 +1434,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Type *RetTy = CLI.RetTy; const CallBase *CB = CLI.CB; const DataLayout &DL = DAG.getDataLayout(); + LLVMContext &Ctx = *DAG.getContext(); const auto GetI32 = [&](const unsigned I) { return DAG.getConstant(I, dl, MVT::i32); @@ -1476,15 +1527,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const SDValue ParamSymbol = getCallParamSymbol(DAG, IsVAArg ? FirstVAArg : ArgI, MVT::i32); - SmallVector<EVT, 16> VTs; - SmallVector<uint64_t, 16> Offsets; - assert((!IsByVal || Arg.IndirectType) && "byval arg must have indirect type"); Type *ETy = (IsByVal ? Arg.IndirectType : Arg.Ty); - ComputePTXValueVTs(*this, DL, ETy, VTs, &Offsets, IsByVal ? 0 : VAOffset); - assert(VTs.size() == Offsets.size() && "Size mismatch"); - assert((IsByVal || VTs.size() == ArgOuts.size()) && "Size mismatch"); const Align ArgAlign = [&]() { if (IsByVal) { @@ -1492,17 +1537,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // so we don't need to worry whether it's naturally aligned or not. // See TargetLowering::LowerCallTo(). const Align InitialAlign = ArgOuts[0].Flags.getNonZeroByValAlign(); - const Align ByValAlign = getFunctionByValParamAlign( - CB->getCalledFunction(), ETy, InitialAlign, DL); - if (IsVAArg) - VAOffset = alignTo(VAOffset, ByValAlign); - return ByValAlign; + return getFunctionByValParamAlign(CB->getCalledFunction(), ETy, + InitialAlign, DL); } return getArgumentAlignment(CB, Arg.Ty, ArgI + 1, DL); }(); - const unsigned TypeSize = DL.getTypeAllocSize(ETy); - assert((!IsByVal || TypeSize == ArgOuts[0].Flags.getByValSize()) && + const unsigned TySize = DL.getTypeAllocSize(ETy); + assert((!IsByVal || TySize == ArgOuts[0].Flags.getByValSize()) && "type size mismatch"); const SDValue ArgDeclare = [&]() { @@ -1510,105 +1552,120 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, return VADeclareParam; if (IsByVal || shouldPassAsArray(Arg.Ty)) - return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TypeSize); + return MakeDeclareArrayParam(ParamSymbol, ArgAlign, TySize); assert(ArgOuts.size() == 1 && "We must pass only one value as non-array"); assert((ArgOuts[0].VT.isInteger() || ArgOuts[0].VT.isFloatingPoint()) && "Only int and float types are supported as non-array arguments"); - return MakeDeclareScalarParam(ParamSymbol, TypeSize); + return MakeDeclareScalarParam(ParamSymbol, TySize); }(); - // PTX Interoperability Guide 3.3(A): [Integer] Values shorter - // than 32-bits are sign extended or zero extended, depending on - // whether they are signed or unsigned types. This case applies - // only to scalar parameters and not to aggregate values. - const bool ExtendIntegerParam = - Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32; + if (IsByVal) { + assert(ArgOutVals.size() == 1 && "We must pass only one value as byval"); + SDValue SrcPtr = ArgOutVals[0]; + const auto PointerInfo = refinePtrAS(SrcPtr, DAG, DL, *this); + const Align BaseSrcAlign = ArgOuts[0].Flags.getNonZeroByValAlign(); - const auto GetStoredValue = [&](const unsigned I, EVT EltVT, - const MaybeAlign PartAlign) { - if (IsByVal) { - SDValue Ptr = ArgOutVals[0]; - auto MPI = refinePtrAS(Ptr, DAG, DL, *this); - SDValue SrcAddr = - DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(Offsets[I])); - - return DAG.getLoad(EltVT, dl, CallChain, SrcAddr, MPI, PartAlign); + if (IsVAArg) + VAOffset = alignTo(VAOffset, ArgAlign); + + SmallVector<EVT, 4> ValueVTs, MemVTs; + SmallVector<TypeSize, 4> Offsets; + ComputeValueVTs(*this, DL, ETy, ValueVTs, &MemVTs, &Offsets); + + unsigned J = 0; + const auto VI = VectorizePTXValueVTs(MemVTs, Offsets, ArgAlign, IsVAArg); + for (const unsigned NumElts : VI) { + EVT LoadVT = getVectorizedVT(MemVTs[J], NumElts, Ctx); + Align SrcAlign = commonAlignment(BaseSrcAlign, Offsets[J]); + SDValue SrcAddr = DAG.getObjectPtrOffset(dl, SrcPtr, Offsets[J]); + SDValue SrcLoad = + DAG.getLoad(LoadVT, dl, CallChain, SrcAddr, PointerInfo, SrcAlign); + + TypeSize ParamOffset = Offsets[J].getWithIncrement(VAOffset); + Align ParamAlign = commonAlignment(ArgAlign, ParamOffset); + SDValue ParamAddr = + DAG.getObjectPtrOffset(dl, ParamSymbol, ParamOffset); + SDValue StoreParam = + DAG.getStore(ArgDeclare, dl, SrcLoad, ParamAddr, + MachinePointerInfo(ADDRESS_SPACE_PARAM), ParamAlign); + CallPrereqs.push_back(StoreParam); + + J += NumElts; } - SDValue StVal = ArgOutVals[I]; - assert(promoteScalarIntegerPTX(StVal.getValueType()) == - StVal.getValueType() && - "OutVal type should always be legal"); - - const EVT VTI = promoteScalarIntegerPTX(VTs[I]); - const EVT StoreVT = - ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI); - - return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl); - }; - - const auto VectorInfo = - VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg); - - unsigned J = 0; - for (const unsigned NumElts : VectorInfo) { - const int CurOffset = Offsets[J]; - const EVT EltVT = promoteScalarIntegerPTX(VTs[J]); - - if (IsVAArg && !IsByVal) - // Align each part of the variadic argument to their type. - VAOffset = alignTo(VAOffset, DAG.getEVTAlign(EltVT)); - - assert((IsVAArg || VAOffset == 0) && - "VAOffset must be 0 for non-VA args"); + if (IsVAArg) + VAOffset += TySize; + } else { + SmallVector<EVT, 16> VTs; + SmallVector<uint64_t, 16> Offsets; + ComputePTXValueVTs(*this, DL, Arg.Ty, VTs, &Offsets, VAOffset); + assert(VTs.size() == Offsets.size() && "Size mismatch"); + assert(VTs.size() == ArgOuts.size() && "Size mismatch"); + + // PTX Interoperability Guide 3.3(A): [Integer] Values shorter + // than 32-bits are sign extended or zero extended, depending on + // whether they are signed or unsigned types. This case applies + // only to scalar parameters and not to aggregate values. + const bool ExtendIntegerParam = + Arg.Ty->isIntegerTy() && DL.getTypeAllocSizeInBits(Arg.Ty) < 32; + + const auto GetStoredValue = [&](const unsigned I) { + SDValue StVal = ArgOutVals[I]; + assert(promoteScalarIntegerPTX(StVal.getValueType()) == + StVal.getValueType() && + "OutVal type should always be legal"); + + const EVT VTI = promoteScalarIntegerPTX(VTs[I]); + const EVT StoreVT = + ExtendIntegerParam ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI); + + return correctParamType(StVal, StoreVT, ArgOuts[I].Flags, DAG, dl); + }; + + unsigned J = 0; + const auto VI = VectorizePTXValueVTs(VTs, Offsets, ArgAlign, IsVAArg); + for (const unsigned NumElts : VI) { + const EVT EltVT = promoteScalarIntegerPTX(VTs[J]); + + unsigned Offset; + if (IsVAArg) { + // TODO: We may need to support vector types that can be passed + // as scalars in variadic arguments. + assert(NumElts == 1 && + "Vectorization should be disabled for vaargs."); + + // Align each part of the variadic argument to their type. + VAOffset = alignTo(VAOffset, DAG.getEVTAlign(EltVT)); + Offset = VAOffset; + + const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT; + VAOffset += DL.getTypeAllocSize(TheStoreType.getTypeForEVT(Ctx)); + } else { + assert(VAOffset == 0 && "VAOffset must be 0 for non-VA args"); + Offset = Offsets[J]; + } - const unsigned Offset = - (VAOffset + ((IsVAArg && !IsByVal) ? 0 : CurOffset)); - SDValue Ptr = - DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset)); + SDValue Ptr = + DAG.getObjectPtrOffset(dl, ParamSymbol, TypeSize::getFixed(Offset)); - const MaybeAlign CurrentAlign = ExtendIntegerParam - ? MaybeAlign(std::nullopt) - : commonAlignment(ArgAlign, Offset); + const MaybeAlign CurrentAlign = ExtendIntegerParam + ? MaybeAlign(std::nullopt) + : commonAlignment(ArgAlign, Offset); - SDValue Val; - if (NumElts == 1) { - Val = GetStoredValue(J, EltVT, CurrentAlign); - } else { - SmallVector<SDValue, 8> StoreVals; - for (const unsigned K : llvm::seq(NumElts)) { - SDValue ValJ = GetStoredValue(J + K, EltVT, CurrentAlign); - if (ValJ.getValueType().isVector()) - DAG.ExtractVectorElements(ValJ, StoreVals); - else - StoreVals.push_back(ValJ); - } + SDValue Val = + getBuildVectorizedValue(NumElts, dl, DAG, [&](unsigned K) { + return GetStoredValue(J + K); + }); - EVT VT = EVT::getVectorVT( - *DAG.getContext(), StoreVals[0].getValueType(), StoreVals.size()); - Val = DAG.getBuildVector(VT, dl, StoreVals); - } + SDValue StoreParam = + DAG.getStore(ArgDeclare, dl, Val, Ptr, + MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign); + CallPrereqs.push_back(StoreParam); - SDValue StoreParam = - DAG.getStore(ArgDeclare, dl, Val, Ptr, - MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign); - CallPrereqs.push_back(StoreParam); - - // TODO: We may need to support vector types that can be passed - // as scalars in variadic arguments. - if (IsVAArg && !IsByVal) { - assert(NumElts == 1 && - "Vectorization is expected to be disabled for variadics."); - const EVT TheStoreType = ExtendIntegerParam ? MVT::i32 : EltVT; - VAOffset += - DL.getTypeAllocSize(TheStoreType.getTypeForEVT(*DAG.getContext())); + J += NumElts; } - - J += NumElts; } - if (IsVAArg && IsByVal) - VAOffset += TypeSize; } // Handle Result @@ -1676,17 +1733,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallPrereqs.push_back(PrototypeDeclare); } - if (ConvertToIndirectCall) { - // Copy the function ptr to a ptx register and use the register to call the - // function. - const MVT DestVT = Callee.getValueType().getSimpleVT(); - MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - Register DestReg = MRI.createVirtualRegister(TLI.getRegClassFor(DestVT)); - auto RegCopy = DAG.getCopyToReg(DAG.getEntryNode(), dl, DestReg, Callee); - Callee = DAG.getCopyFromReg(RegCopy, dl, DestReg, DestVT); - } - const unsigned Proto = IsIndirectCall ? UniqueCallSite : 0; const unsigned NumArgs = std::min<unsigned>(CLI.NumFixedArgs + 1, Args.size()); @@ -1703,10 +1749,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (!Ins.empty()) { SmallVector<EVT, 16> VTs; SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets, 0); + ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); assert(VTs.size() == Ins.size() && "Bad value decomposition"); const Align RetAlign = getArgumentAlignment(CB, RetTy, 0, DL); + const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32); // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether @@ -1714,9 +1761,9 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const bool ExtendIntegerRetVal = RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; - const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); unsigned I = 0; - for (const unsigned NumElts : VectorInfo) { + const auto VI = VectorizePTXValueVTs(VTs, Offsets, RetAlign); + for (const unsigned NumElts : VI) { const MaybeAlign CurrentAlign = ExtendIntegerRetVal ? MaybeAlign(std::nullopt) : commonAlignment(RetAlign, Offsets[I]); @@ -1724,16 +1771,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, const EVT VTI = promoteScalarIntegerPTX(VTs[I]); const EVT LoadVT = ExtendIntegerRetVal ? MVT::i32 : (VTI == MVT::i1 ? MVT::i8 : VTI); - - const unsigned PackingAmt = - LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1; - - const EVT VecVT = NumElts == 1 ? LoadVT - : EVT::getVectorVT(*DAG.getContext(), - LoadVT.getScalarType(), - NumElts * PackingAmt); - - const SDValue RetSymbol = DAG.getExternalSymbol("retval0", MVT::i32); + const EVT VecVT = getVectorizedVT(LoadVT, NumElts, Ctx); SDValue Ptr = DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I])); @@ -1742,17 +1780,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachinePointerInfo(ADDRESS_SPACE_PARAM), CurrentAlign); LoadChains.push_back(R.getValue(1)); - - if (NumElts == 1) - ProxyRegOps.push_back(R); - else - for (const unsigned J : llvm::seq(NumElts)) { - SDValue Elt = DAG.getNode( - LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR - : ISD::EXTRACT_VECTOR_ELT, - dl, LoadVT, R, DAG.getVectorIdxConstant(J * PackingAmt, dl)); - ProxyRegOps.push_back(Elt); - } + for (const unsigned J : llvm::seq(NumElts)) + ProxyRegOps.push_back(getExtractVectorizedValue(R, J, LoadVT, dl, DAG)); I += NumElts; } } @@ -3227,11 +3256,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); const DataLayout &DL = DAG.getDataLayout(); auto PtrVT = getPointerTy(DAG.getDataLayout()); - const Function *F = &MF.getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); SDValue Root = DAG.getRoot(); SmallVector<SDValue, 16> OutChains; @@ -3247,7 +3275,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( // See similar issue in LowerCall. auto AllIns = ArrayRef(Ins); - for (const auto &Arg : F->args()) { + for (const auto &Arg : F.args()) { const auto ArgIns = AllIns.take_while( [&](auto I) { return I.OrigArgIndex == Arg.getArgNo(); }); AllIns = AllIns.drop_front(ArgIns.size()); @@ -3287,7 +3315,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( assert(ByvalIn.VT == PtrVT && "ByVal argument must be a pointer"); SDValue P; - if (isKernelFunction(*F)) { + if (isKernelFunction(F)) { P = ArgSymbol; P.getNode()->setIROrder(Arg.getArgNo() + 1); } else { @@ -3305,43 +3333,27 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( assert(VTs.size() == Offsets.size() && "Size mismatch"); const Align ArgAlign = getFunctionArgumentAlignment( - F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL); + &F, Ty, Arg.getArgNo() + AttributeList::FirstArgIndex, DL); - const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); unsigned I = 0; - for (const unsigned NumElts : VectorInfo) { + const auto VI = VectorizePTXValueVTs(VTs, Offsets, ArgAlign); + for (const unsigned NumElts : VI) { // i1 is loaded/stored as i8 const EVT LoadVT = VTs[I] == MVT::i1 ? MVT::i8 : VTs[I]; - // If the element is a packed type (ex. v2f16, v4i8, etc) holding - // multiple elements. - const unsigned PackingAmt = - LoadVT.isVector() ? LoadVT.getVectorNumElements() : 1; - - const EVT VecVT = - NumElts == 1 - ? LoadVT - : EVT::getVectorVT(F->getContext(), LoadVT.getScalarType(), - NumElts * PackingAmt); + const EVT VecVT = getVectorizedVT(LoadVT, NumElts, *DAG.getContext()); SDValue VecAddr = DAG.getObjectPtrOffset( dl, ArgSymbol, TypeSize::getFixed(Offsets[I])); - const MaybeAlign PartAlign = commonAlignment(ArgAlign, Offsets[I]); + const Align PartAlign = commonAlignment(ArgAlign, Offsets[I]); SDValue P = DAG.getLoad(VecVT, dl, Root, VecAddr, MachinePointerInfo(ADDRESS_SPACE_PARAM), PartAlign, MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); - if (P.getNode()) - P.getNode()->setIROrder(Arg.getArgNo() + 1); + P.getNode()->setIROrder(Arg.getArgNo() + 1); for (const unsigned J : llvm::seq(NumElts)) { - SDValue Elt = - NumElts == 1 - ? P - : DAG.getNode(LoadVT.isVector() ? ISD::EXTRACT_SUBVECTOR - : ISD::EXTRACT_VECTOR_ELT, - dl, LoadVT, P, - DAG.getVectorIdxConstant(J * PackingAmt, dl)); + SDValue Elt = getExtractVectorizedValue(P, J, LoadVT, dl, DAG); Elt = correctParamType(Elt, ArgIns[I + J].VT, ArgIns[I + J].Flags, DAG, dl); @@ -3364,9 +3376,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - const MachineFunction &MF = DAG.getMachineFunction(); - const Function &F = MF.getFunction(); - Type *RetTy = MF.getFunction().getReturnType(); + const Function &F = DAG.getMachineFunction().getFunction(); + Type *RetTy = F.getReturnType(); if (RetTy->isVoidTy()) { assert(OutVals.empty() && Outs.empty() && "Return value expected for void"); @@ -3374,10 +3385,9 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, } const DataLayout &DL = DAG.getDataLayout(); - SmallVector<EVT, 16> VTs; - SmallVector<uint64_t, 16> Offsets; - ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); - assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); + + const SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32); + const auto RetAlign = getFunctionParamOptimizedAlign(&F, RetTy, DL); // PTX Interoperability Guide 3.3(A): [Integer] Values shorter than // 32-bits are sign extended or zero extended, depending on whether @@ -3385,6 +3395,11 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const bool ExtendIntegerRetVal = RetTy->isIntegerTy() && DL.getTypeAllocSizeInBits(RetTy) < 32; + SmallVector<EVT, 16> VTs; + SmallVector<uint64_t, 16> Offsets; + ComputePTXValueVTs(*this, DL, RetTy, VTs, &Offsets); + assert(VTs.size() == OutVals.size() && "Bad return value decomposition"); + const auto GetRetVal = [&](unsigned I) -> SDValue { SDValue RetVal = OutVals[I]; assert(promoteScalarIntegerPTX(RetVal.getValueType()) == @@ -3397,33 +3412,16 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return correctParamType(RetVal, StoreVT, Outs[I].Flags, DAG, dl); }; - const auto RetAlign = getFunctionParamOptimizedAlign(&F, RetTy, DL); - const auto VectorInfo = VectorizePTXValueVTs(VTs, Offsets, RetAlign); unsigned I = 0; - for (const unsigned NumElts : VectorInfo) { + const auto VI = VectorizePTXValueVTs(VTs, Offsets, RetAlign); + for (const unsigned NumElts : VI) { const MaybeAlign CurrentAlign = ExtendIntegerRetVal ? MaybeAlign(std::nullopt) : commonAlignment(RetAlign, Offsets[I]); - SDValue Val; - if (NumElts == 1) { - Val = GetRetVal(I); - } else { - SmallVector<SDValue, 4> StoreVals; - for (const unsigned J : llvm::seq(NumElts)) { - SDValue ValJ = GetRetVal(I + J); - if (ValJ.getValueType().isVector()) - DAG.ExtractVectorElements(ValJ, StoreVals); - else - StoreVals.push_back(ValJ); - } - - EVT VT = EVT::getVectorVT(F.getContext(), StoreVals[0].getValueType(), - StoreVals.size()); - Val = DAG.getBuildVector(VT, dl, StoreVals); - } + SDValue Val = getBuildVectorizedValue( + NumElts, dl, DAG, [&](unsigned K) { return GetRetVal(I + K); }); - const SDValue RetSymbol = DAG.getExternalSymbol("func_retval0", MVT::i32); SDValue Ptr = DAG.getObjectPtrOffset(dl, RetSymbol, TypeSize::getFixed(Offsets[I])); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index d8047d3..2ae7520 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -1602,8 +1602,6 @@ foreach is_convergent = [0, 1] in { } defvar call_inst = !cast<NVPTXInst>("CALL" # convergent_suffix); - def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, globaladdr:$addr, imm:$proto), - (call_inst (to_tglobaladdr $addr), imm:$rets, imm:$params, imm:$proto)>; def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i32:$addr, imm:$proto), (call_inst $addr, imm:$rets, imm:$params, imm:$proto)>; def : Pat<(call is_convergent, 1, imm:$rets, imm:$params, i64:$addr, imm:$proto), @@ -1612,10 +1610,6 @@ foreach is_convergent = [0, 1] in { defvar call_uni_inst = !cast<NVPTXInst>("CALL_UNI" # convergent_suffix); def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, globaladdr:$addr, 0), (call_uni_inst (to_tglobaladdr $addr), imm:$rets, imm:$params)>; - def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i32:$addr, 0), - (call_uni_inst $addr, imm:$rets, imm:$params)>; - def : Pat<(call is_convergent, 0, imm:$rets, imm:$params, i64:$addr, 0), - (call_uni_inst $addr, imm:$rets, imm:$params)>; } def DECLARE_PARAM_array : diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 459525e..f179873 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -7296,9 +7296,17 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX( if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() && ValVT.isInteger() && ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) { - SDValue ArgValueTrunc = DAG.getNode( - ISD::TRUNCATE, dl, ArgVT.getSimpleVT() == MVT::i1 ? MVT::i8 : ArgVT, - ArgValue); + // It is possible to have either real integer values + // or integers that were not originally integers. + // In the latter case, these could have came from structs, + // and these integers would not have an extend on the parameter. + // Since these types of integers do not have an extend specified + // in the first place, the type of extend that we do should not matter. + EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1 + ? MVT::i8 + : ArgVT; + SDValue ArgValueTrunc = + DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue); SDValue ArgValueExt = ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT) : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT); diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp index 5eb1f01..b7e2263 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -100,10 +100,14 @@ bool PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, // This is a best effort to set things up for a post-RA pass. Optimizations // like generating loads of multiple registers should ideally be done within // the scheduler pass by combining the loads during DAG postprocessing. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -189,10 +193,14 @@ bool PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, return TryCand.Reason != NoCand; // Keep clustered nodes together. - const ClusterInfo *CandCluster = Cand.AtTop ? TopCluster : BotCluster; - const ClusterInfo *TryCandCluster = TryCand.AtTop ? TopCluster : BotCluster; - if (tryGreater(TryCandCluster && TryCandCluster->contains(TryCand.SU), - CandCluster && CandCluster->contains(Cand.SU), TryCand, Cand, + unsigned CandZoneCluster = Cand.AtTop ? TopClusterID : BotClusterID; + unsigned TryCandZoneCluster = TryCand.AtTop ? TopClusterID : BotClusterID; + bool CandIsClusterSucc = + isTheSameCluster(CandZoneCluster, Cand.SU->ParentClusterIdx); + bool TryCandIsClusterSucc = + isTheSameCluster(TryCandZoneCluster, TryCand.SU->ParentClusterIdx); + + if (tryGreater(TryCandIsClusterSucc, CandIsClusterSucc, TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 82e3b5c..9538b20 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -901,7 +901,7 @@ void RISCVAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned Offset = Fixup.getOffset(); unsigned NumBytes = alignTo(Info.TargetSize + Info.TargetOffset, 8) / 8; - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f223fdbe..5998653 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2827,6 +2827,8 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL, static bool isWorthFoldingAdd(SDValue Add) { for (auto *User : Add->users()) { if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE && + User->getOpcode() != RISCVISD::LD_RV32 && + User->getOpcode() != RISCVISD::SD_RV32 && User->getOpcode() != ISD::ATOMIC_LOAD && User->getOpcode() != ISD::ATOMIC_STORE) return false; @@ -2841,6 +2843,9 @@ static bool isWorthFoldingAdd(SDValue Add) { if (User->getOpcode() == ISD::ATOMIC_STORE && cast<AtomicSDNode>(User)->getVal() == Add) return false; + if (User->getOpcode() == RISCVISD::SD_RV32 && + (User->getOperand(0) == Add || User->getOperand(1) == Add)) + return false; if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering())) return false; } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c0ada51..adbfbeb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1819,6 +1819,13 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::riscv_seg6_load_mask: case Intrinsic::riscv_seg7_load_mask: case Intrinsic::riscv_seg8_load_mask: + case Intrinsic::riscv_sseg2_load_mask: + case Intrinsic::riscv_sseg3_load_mask: + case Intrinsic::riscv_sseg4_load_mask: + case Intrinsic::riscv_sseg5_load_mask: + case Intrinsic::riscv_sseg6_load_mask: + case Intrinsic::riscv_sseg7_load_mask: + case Intrinsic::riscv_sseg8_load_mask: return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false, /*IsUnitStrided*/ false, /*UsePtrVal*/ true); case Intrinsic::riscv_seg2_store_mask: @@ -10938,6 +10945,97 @@ static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG, return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands); } +static SDValue +lowerFixedVectorSegLoadIntrinsics(unsigned IntNo, SDValue Op, + const RISCVSubtarget &Subtarget, + SelectionDAG &DAG) { + bool IsStrided; + switch (IntNo) { + case Intrinsic::riscv_seg2_load_mask: + case Intrinsic::riscv_seg3_load_mask: + case Intrinsic::riscv_seg4_load_mask: + case Intrinsic::riscv_seg5_load_mask: + case Intrinsic::riscv_seg6_load_mask: + case Intrinsic::riscv_seg7_load_mask: + case Intrinsic::riscv_seg8_load_mask: + IsStrided = false; + break; + case Intrinsic::riscv_sseg2_load_mask: + case Intrinsic::riscv_sseg3_load_mask: + case Intrinsic::riscv_sseg4_load_mask: + case Intrinsic::riscv_sseg5_load_mask: + case Intrinsic::riscv_sseg6_load_mask: + case Intrinsic::riscv_sseg7_load_mask: + case Intrinsic::riscv_sseg8_load_mask: + IsStrided = true; + break; + default: + llvm_unreachable("unexpected intrinsic ID"); + }; + + static const Intrinsic::ID VlsegInts[7] = { + Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask, + Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask, + Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask, + Intrinsic::riscv_vlseg8_mask}; + static const Intrinsic::ID VlssegInts[7] = { + Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask, + Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask, + Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask, + Intrinsic::riscv_vlsseg8_mask}; + + SDLoc DL(Op); + unsigned NF = Op->getNumValues() - 1; + assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); + MVT XLenVT = Subtarget.getXLenVT(); + MVT VT = Op->getSimpleValueType(0); + MVT ContainerVT = ::getContainerForFixedLengthVector(DAG, VT, Subtarget); + unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * + ContainerVT.getScalarSizeInBits(); + EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); + + // Operands: (chain, int_id, pointer, mask, vl) or + // (chain, int_id, pointer, offset, mask, vl) + SDValue VL = Op.getOperand(Op.getNumOperands() - 1); + SDValue Mask = Op.getOperand(Op.getNumOperands() - 2); + MVT MaskVT = Mask.getSimpleValueType(); + MVT MaskContainerVT = + ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); + Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); + + SDValue IntID = DAG.getTargetConstant( + IsStrided ? VlssegInts[NF - 2] : VlsegInts[NF - 2], DL, XLenVT); + auto *Load = cast<MemIntrinsicSDNode>(Op); + + SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other}); + SmallVector<SDValue, 9> Ops = { + Load->getChain(), + IntID, + DAG.getUNDEF(VecTupTy), + Op.getOperand(2), + Mask, + VL, + DAG.getTargetConstant( + RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT), + DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; + // Insert the stride operand. + if (IsStrided) + Ops.insert(std::next(Ops.begin(), 4), Op.getOperand(3)); + + SDValue Result = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, + Load->getMemoryVT(), Load->getMemOperand()); + SmallVector<SDValue, 9> Results; + for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) { + SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT, + Result.getValue(0), + DAG.getTargetConstant(RetIdx, DL, MVT::i32)); + Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget)); + } + Results.push_back(Result.getValue(1)); + return DAG.getMergeValues(Results, DL); +} + SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(1); @@ -10950,57 +11048,16 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::riscv_seg5_load_mask: case Intrinsic::riscv_seg6_load_mask: case Intrinsic::riscv_seg7_load_mask: - case Intrinsic::riscv_seg8_load_mask: { - SDLoc DL(Op); - static const Intrinsic::ID VlsegInts[7] = { - Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask, - Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask, - Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask, - Intrinsic::riscv_vlseg8_mask}; - unsigned NF = Op->getNumValues() - 1; - assert(NF >= 2 && NF <= 8 && "Unexpected seg number"); - MVT XLenVT = Subtarget.getXLenVT(); - MVT VT = Op->getSimpleValueType(0); - MVT ContainerVT = getContainerForFixedLengthVector(VT); - unsigned Sz = NF * ContainerVT.getVectorMinNumElements() * - ContainerVT.getScalarSizeInBits(); - EVT VecTupTy = MVT::getRISCVVectorTupleVT(Sz, NF); - - // Operands: (chain, int_id, pointer, mask, vl) - SDValue VL = Op.getOperand(Op.getNumOperands() - 1); - SDValue Mask = Op.getOperand(3); - MVT MaskVT = Mask.getSimpleValueType(); - MVT MaskContainerVT = - ::getContainerForFixedLengthVector(DAG, MaskVT, Subtarget); - Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget); - - SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT); - auto *Load = cast<MemIntrinsicSDNode>(Op); + case Intrinsic::riscv_seg8_load_mask: + case Intrinsic::riscv_sseg2_load_mask: + case Intrinsic::riscv_sseg3_load_mask: + case Intrinsic::riscv_sseg4_load_mask: + case Intrinsic::riscv_sseg5_load_mask: + case Intrinsic::riscv_sseg6_load_mask: + case Intrinsic::riscv_sseg7_load_mask: + case Intrinsic::riscv_sseg8_load_mask: + return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG); - SDVTList VTs = DAG.getVTList({VecTupTy, MVT::Other}); - SDValue Ops[] = { - Load->getChain(), - IntID, - DAG.getUNDEF(VecTupTy), - Op.getOperand(2), - Mask, - VL, - DAG.getTargetConstant( - RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC, DL, XLenVT), - DAG.getTargetConstant(Log2_64(VT.getScalarSizeInBits()), DL, XLenVT)}; - SDValue Result = - DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, - Load->getMemoryVT(), Load->getMemOperand()); - SmallVector<SDValue, 9> Results; - for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++) { - SDValue SubVec = DAG.getNode(RISCVISD::TUPLE_EXTRACT, DL, ContainerVT, - Result.getValue(0), - DAG.getTargetConstant(RetIdx, DL, MVT::i32)); - Results.push_back(convertFromScalableVector(VT, SubVec, DAG, Subtarget)); - } - Results.push_back(Result.getValue(1)); - return DAG.getMergeValues(Results, DL); - } case Intrinsic::riscv_sf_vc_v_x_se: return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE); case Intrinsic::riscv_sf_vc_v_i_se: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 31ea2de..cc2977c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -910,7 +910,7 @@ foreach vti = AllIntegerVectors in { foreach vti = I64IntegerVectors in { let Predicates = [HasVInstructionsI64] in { def : Pat<(add (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat_imm64_neg i64:$rs2))), + (vti.Vector (SplatPat_imm64_neg (i64 GPR:$rs2)))), (!cast<Instruction>("PseudoVSUB_VX_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 695223b..acbccdd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2123,7 +2123,7 @@ foreach vti = AllIntegerVectors in { foreach vti = I64IntegerVectors in { let Predicates = [HasVInstructionsI64] in { def : Pat<(riscv_add_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat_imm64_neg i64:$rs2)), + (vti.Vector (SplatPat_imm64_neg (i64 GPR:$rs2))), vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), (!cast<Instruction>("PseudoVSUB_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td index c0f7ab1..4c31ce4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td @@ -590,12 +590,12 @@ let Predicates = [HasVendorXTHeadBb, IsRV64] in { def : PatGprImm<riscv_rorw, TH_SRRIW, uimm5>; def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2), (TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>; -def : Pat<(sra (bswap i64:$rs1), (i64 32)), - (TH_REVW i64:$rs1)>; -def : Pat<(binop_allwusers<srl> (bswap i64:$rs1), (i64 32)), - (TH_REVW i64:$rs1)>; -def : Pat<(riscv_clzw i64:$rs1), - (TH_FF0 (i64 (SLLI (i64 (XORI i64:$rs1, -1)), 32)))>; +def : Pat<(i64 (sra (bswap GPR:$rs1), (i64 32))), + (TH_REVW GPR:$rs1)>; +def : Pat<(binop_allwusers<srl> (bswap GPR:$rs1), (i64 32)), + (TH_REVW GPR:$rs1)>; +def : Pat<(riscv_clzw GPR:$rs1), + (TH_FF0 (i64 (SLLI (i64 (XORI GPR:$rs1, -1)), 32)))>; } // Predicates = [HasVendorXTHeadBb, IsRV64] let Predicates = [HasVendorXTHeadBs] in { @@ -697,11 +697,13 @@ def uimm2_4 : Operand<XLenVT>, ImmLeaf<XLenVT, [{ }], uimm2_4_XFORM>; let Predicates = [HasVendorXTHeadMemPair, IsRV64] in { -def : Pat<(th_lwud i64:$rs1, uimm2_3:$uimm2_3), (TH_LWUD i64:$rs1, uimm2_3:$uimm2_3, 3)>; -def : Pat<(th_ldd i64:$rs1, uimm2_4:$uimm2_4), (TH_LDD i64:$rs1, uimm2_4:$uimm2_4, 4)>; +def : Pat<(th_lwud GPR:$rs1, (i64 uimm2_3:$uimm2_3)), + (TH_LWUD GPR:$rs1, uimm2_3:$uimm2_3, 3)>; +def : Pat<(th_ldd GPR:$rs1, (i64 uimm2_4:$uimm2_4)), + (TH_LDD GPR:$rs1, uimm2_4:$uimm2_4, 4)>; -def : Pat<(th_sdd i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4), - (TH_SDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4, 4)>; +def : Pat<(th_sdd (i64 GPR:$rd1), GPR:$rd2, GPR:$rs1, uimm2_4:$uimm2_4), + (TH_SDD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_4:$uimm2_4, 4)>; } let Predicates = [HasVendorXTHeadMemPair] in { diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 6ec7544..25cdf72 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -148,6 +148,7 @@ struct ConvertBuiltin { bool IsSaturated; bool IsRounded; bool IsBfloat16; + bool IsTF32; FPRoundingMode::FPRoundingMode RoundingMode; }; @@ -230,6 +231,7 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall, // - "__spirv_SubgroupImageMediaBlockReadINTEL" // - "__spirv_SubgroupImageMediaBlockWriteINTEL" // - "__spirv_Convert" + // - "__spirv_Round" // - "__spirv_UConvert" // - "__spirv_SConvert" // - "__spirv_FConvert" @@ -242,7 +244,7 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall, "SDotKHR|SUDotKHR|SDotAccSatKHR|UDotAccSatKHR|SUDotAccSatKHR|" "ReadClockKHR|SubgroupBlockReadINTEL|SubgroupImageBlockReadINTEL|" "SubgroupImageMediaBlockReadINTEL|SubgroupImageMediaBlockWriteINTEL|" - "Convert|" + "Convert|Round|" "UConvert|SConvert|FConvert|SatConvert)[^_]*)(_R[^_]*_?(\\w+)?.*)?"); std::smatch Match; if (std::regex_match(BuiltinName, Match, SpvWithR) && Match.size() > 1) { @@ -697,7 +699,8 @@ static bool buildAtomicStoreInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { if (Call->isSpirvOp()) - return buildOpFromWrapper(MIRBuilder, SPIRV::OpAtomicStore, Call, Register(0)); + return buildOpFromWrapper(MIRBuilder, SPIRV::OpAtomicStore, Call, + Register(0)); Register ScopeRegister = buildConstantIntReg32(SPIRV::Scope::Device, MIRBuilder, GR); @@ -2677,8 +2680,20 @@ static bool generateConvertInst(const StringRef DemangledCall, } } else if (GR->isScalarOrVectorOfType(Call->ReturnRegister, SPIRV::OpTypeFloat)) { - // Float -> Float - Opcode = SPIRV::OpFConvert; + if (Builtin->IsTF32) { + const auto *ST = static_cast<const SPIRVSubtarget *>( + &MIRBuilder.getMF().getSubtarget()); + if (!ST->canUseExtension( + SPIRV::Extension::SPV_INTEL_tensor_float32_conversion)) + NeedExtMsg = "SPV_INTEL_tensor_float32_conversion"; + IsRightComponentsNumber = + GR->getScalarOrVectorComponentCount(Call->Arguments[0]) == + GR->getScalarOrVectorComponentCount(Call->ReturnRegister); + Opcode = SPIRV::OpRoundFToTF32INTEL; + } else { + // Float -> Float + Opcode = SPIRV::OpFConvert; + } } } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index ea78dcd..d08560b 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -1461,6 +1461,8 @@ class ConvertBuiltin<string name, InstructionSet set> { bit IsRounded = !not(!eq(!find(name, "_rt"), -1)); bit IsBfloat16 = !or(!not(!eq(!find(name, "BF16"), -1)), !not(!eq(!find(name, "bfloat16"), -1))); + bit IsTF32 = !or(!not(!eq(!find(name, "TF32"), -1)), + !not(!eq(!find(name, "tensor_float32"), -1))); FPRoundingMode RoundingMode = !cond(!not(!eq(!find(name, "_rte"), -1)) : RTE, !not(!eq(!find(name, "_rtz"), -1)) : RTZ, !not(!eq(!find(name, "_rtp"), -1)) : RTP, @@ -1472,7 +1474,7 @@ class ConvertBuiltin<string name, InstructionSet set> { def ConvertBuiltins : GenericTable { let FilterClass = "ConvertBuiltin"; let Fields = ["Name", "Set", "IsDestinationSigned", "IsSaturated", - "IsRounded", "IsBfloat16", "RoundingMode"]; + "IsRounded", "IsBfloat16", "IsTF32", "RoundingMode"]; string TypeOf_Set = "InstructionSet"; string TypeOf_RoundingMode = "FPRoundingMode"; } @@ -1556,6 +1558,25 @@ foreach conv = ["FToBF16INTEL", "BF16ToFINTEL"] in { def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>; } +// cl_intel_tensor_float32_conversions / SPV_INTEL_tensor_float32_conversion +// Multiclass used to define at the same time both a demangled builtin record +// and a corresponding convert builtin record. +multiclass DemangledTF32RoundBuiltin<string name1, string name2> { + // Create records for scalar and vector conversions. + foreach i = ["", "2", "3", "4", "8", "16"] in { + def : DemangledBuiltin<!strconcat("intel_round_", name1, i, name2, i), OpenCL_std, Convert, 1, 1>; + def : ConvertBuiltin<!strconcat("intel_round_", name1, i, name2, i), OpenCL_std>; + } +} + +defm : DemangledTF32RoundBuiltin<"tensor_float32", "_as_float">; +defm : DemangledTF32RoundBuiltin<"as_tensor_float32", "_float">; + +foreach conv = ["FToTF32INTEL"] in { + def : DemangledBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std, Convert, 1, 1>; + def : ConvertBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std>; +} + //===----------------------------------------------------------------------===// // Class defining a vector data load/store builtin record used for lowering // into OpExtInst instruction. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 2726203..d9265f4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -102,7 +102,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>> SPIRV::Extension::Extension::SPV_INTEL_2d_block_io}, {"SPV_INTEL_int4", SPIRV::Extension::Extension::SPV_INTEL_int4}, {"SPV_KHR_float_controls2", - SPIRV::Extension::Extension::SPV_KHR_float_controls2}}; + SPIRV::Extension::Extension::SPV_KHR_float_controls2}, + {"SPV_INTEL_tensor_float32_conversion", + SPIRV::Extension::Extension::SPV_INTEL_tensor_float32_conversion}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 3c631ce..947b574 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -194,6 +194,42 @@ class SPIRVEmitIntrinsics void useRoundingMode(ConstrainedFPIntrinsic *FPI, IRBuilder<> &B); + // Tries to walk the type accessed by the given GEP instruction. + // For each nested type access, one of the 2 callbacks is called: + // - OnLiteralIndexing when the index is a known constant value. + // Parameters: + // PointedType: the pointed type resulting of this indexing. + // If the parent type is an array, this is the index in the array. + // If the parent type is a struct, this is the field index. + // Index: index of the element in the parent type. + // - OnDynamnicIndexing when the index is a non-constant value. + // This callback is only called when indexing into an array. + // Parameters: + // ElementType: the type of the elements stored in the parent array. + // Offset: the Value* containing the byte offset into the array. + // Return true if an error occured during the walk, false otherwise. + bool walkLogicalAccessChain( + GetElementPtrInst &GEP, + const std::function<void(Type *PointedType, uint64_t Index)> + &OnLiteralIndexing, + const std::function<void(Type *ElementType, Value *Offset)> + &OnDynamicIndexing); + + // Returns the type accessed using the given GEP instruction by relying + // on the GEP type. + // FIXME: GEP types are not supposed to be used to retrieve the pointed + // type. This must be fixed. + Type *getGEPType(GetElementPtrInst *GEP); + + // Returns the type accessed using the given GEP instruction by walking + // the source type using the GEP indices. + // FIXME: without help from the frontend, this method cannot reliably retrieve + // the stored type, nor can robustly determine the depth of the type + // we are accessing. + Type *getGEPTypeLogical(GetElementPtrInst *GEP); + + Instruction *buildLogicalAccessChainFromGEP(GetElementPtrInst &GEP); + public: static char ID; SPIRVEmitIntrinsics(SPIRVTargetMachine *TM = nullptr) @@ -246,6 +282,17 @@ bool expectIgnoredInIRTranslation(const Instruction *I) { } } +// Returns the source pointer from `I` ignoring intermediate ptrcast. +Value *getPointerRoot(Value *I) { + if (auto *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::spv_ptrcast) { + Value *V = II->getArgOperand(0); + return getPointerRoot(V); + } + } + return I; +} + } // namespace char SPIRVEmitIntrinsics::ID = 0; @@ -555,7 +602,112 @@ void SPIRVEmitIntrinsics::maybeAssignPtrType(Type *&Ty, Value *Op, Type *RefTy, Ty = RefTy; } -Type *getGEPType(GetElementPtrInst *Ref) { +bool SPIRVEmitIntrinsics::walkLogicalAccessChain( + GetElementPtrInst &GEP, + const std::function<void(Type *, uint64_t)> &OnLiteralIndexing, + const std::function<void(Type *, Value *)> &OnDynamicIndexing) { + // We only rewrite i8* GEP. Other should be left as-is. + // Valid i8* GEP must always have a single index. + assert(GEP.getSourceElementType() == + IntegerType::getInt8Ty(CurrF->getContext())); + assert(GEP.getNumIndices() == 1); + + auto &DL = CurrF->getDataLayout(); + Value *Src = getPointerRoot(GEP.getPointerOperand()); + Type *CurType = deduceElementType(Src, true); + + Value *Operand = *GEP.idx_begin(); + ConstantInt *CI = dyn_cast<ConstantInt>(Operand); + if (!CI) { + ArrayType *AT = dyn_cast<ArrayType>(CurType); + // Operand is not constant. Either we have an array and accept it, or we + // give up. + if (AT) + OnDynamicIndexing(AT->getElementType(), Operand); + return AT == nullptr; + } + + assert(CI); + uint64_t Offset = CI->getZExtValue(); + + do { + if (ArrayType *AT = dyn_cast<ArrayType>(CurType)) { + uint32_t EltTypeSize = DL.getTypeSizeInBits(AT->getElementType()) / 8; + assert(Offset < AT->getNumElements() * EltTypeSize); + uint64_t Index = Offset / EltTypeSize; + Offset = Offset - (Index * EltTypeSize); + CurType = AT->getElementType(); + OnLiteralIndexing(CurType, Index); + } else if (StructType *ST = dyn_cast<StructType>(CurType)) { + uint32_t StructSize = DL.getTypeSizeInBits(ST) / 8; + assert(Offset < StructSize); + (void)StructSize; + const auto &STL = DL.getStructLayout(ST); + unsigned Element = STL->getElementContainingOffset(Offset); + Offset -= STL->getElementOffset(Element); + CurType = ST->getElementType(Element); + OnLiteralIndexing(CurType, Element); + } else { + // Vector type indexing should not use GEP. + // So if we have an index left, something is wrong. Giving up. + return true; + } + } while (Offset > 0); + + return false; +} + +Instruction * +SPIRVEmitIntrinsics::buildLogicalAccessChainFromGEP(GetElementPtrInst &GEP) { + auto &DL = CurrF->getDataLayout(); + IRBuilder<> B(GEP.getParent()); + B.SetInsertPoint(&GEP); + + std::vector<Value *> Indices; + Indices.push_back(ConstantInt::get( + IntegerType::getInt32Ty(CurrF->getContext()), 0, /* Signed= */ false)); + walkLogicalAccessChain( + GEP, + [&Indices, &B](Type *EltType, uint64_t Index) { + Indices.push_back( + ConstantInt::get(B.getInt64Ty(), Index, /* Signed= */ false)); + }, + [&Indices, &B, &DL](Type *EltType, Value *Offset) { + uint32_t EltTypeSize = DL.getTypeSizeInBits(EltType) / 8; + Value *Index = B.CreateUDiv( + Offset, ConstantInt::get(Offset->getType(), EltTypeSize, + /* Signed= */ false)); + Indices.push_back(Index); + }); + + SmallVector<Type *, 2> Types = {GEP.getType(), GEP.getOperand(0)->getType()}; + SmallVector<Value *, 4> Args; + Args.push_back(B.getInt1(GEP.isInBounds())); + Args.push_back(GEP.getOperand(0)); + llvm::append_range(Args, Indices); + auto *NewI = B.CreateIntrinsic(Intrinsic::spv_gep, {Types}, {Args}); + replaceAllUsesWithAndErase(B, &GEP, NewI); + return NewI; +} + +Type *SPIRVEmitIntrinsics::getGEPTypeLogical(GetElementPtrInst *GEP) { + + Type *CurType = GEP->getResultElementType(); + + bool Interrupted = walkLogicalAccessChain( + *GEP, [&CurType](Type *EltType, uint64_t Index) { CurType = EltType; }, + [&CurType](Type *EltType, Value *Index) { CurType = EltType; }); + + return Interrupted ? GEP->getResultElementType() : CurType; +} + +Type *SPIRVEmitIntrinsics::getGEPType(GetElementPtrInst *Ref) { + if (Ref->getSourceElementType() == + IntegerType::getInt8Ty(CurrF->getContext()) && + TM->getSubtargetImpl()->isLogicalSPIRV()) { + return getGEPTypeLogical(Ref); + } + Type *Ty = nullptr; // TODO: not sure if GetElementPtrInst::getTypeAtIndex() does anything // useful here @@ -1395,6 +1547,13 @@ Instruction *SPIRVEmitIntrinsics::visitSwitchInst(SwitchInst &I) { } Instruction *SPIRVEmitIntrinsics::visitGetElementPtrInst(GetElementPtrInst &I) { + if (I.getSourceElementType() == IntegerType::getInt8Ty(CurrF->getContext()) && + TM->getSubtargetImpl()->isLogicalSPIRV()) { + Instruction *Result = buildLogicalAccessChainFromGEP(I); + if (Result) + return Result; + } + IRBuilder<> B(I.getParent()); B.SetInsertPoint(&I); SmallVector<Type *, 2> Types = {I.getType(), I.getOperand(0)->getType()}; @@ -1588,7 +1747,24 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, } if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { Value *Pointer = GEPI->getPointerOperand(); - Type *OpTy = GEPI->getSourceElementType(); + Type *OpTy = nullptr; + + // Knowing the accessed type is mandatory for logical SPIR-V. Sadly, + // the GEP source element type should not be used for this purpose, and + // the alternative type-scavenging method is not working. + // Physical SPIR-V can work around this, but not logical, hence still + // try to rely on the broken type scavenging for logical. + bool IsRewrittenGEP = + GEPI->getSourceElementType() == IntegerType::getInt8Ty(I->getContext()); + if (IsRewrittenGEP && TM->getSubtargetImpl()->isLogicalSPIRV()) { + Value *Src = getPointerRoot(Pointer); + OpTy = GR->findDeducedElementType(Src); + } + + // In all cases, fall back to the GEP type if type scavenging failed. + if (!OpTy) + OpTy = GEPI->getSourceElementType(); + replacePointerOperandWithPtrCast(I, Pointer, OpTy, 0, B); if (isNestedPointer(OpTy)) insertTodoType(Pointer); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 049ba02..f0b938d 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -445,6 +445,9 @@ def OpCrossWorkgroupCastToPtrINTEL : UnOp<"OpCrossWorkgroupCastToPtrINTEL", 5938 def OpConvertFToBF16INTEL : UnOp<"OpConvertFToBF16INTEL", 6116>; def OpConvertBF16ToFINTEL : UnOp<"OpConvertBF16ToFINTEL", 6117>; +// SPV_INTEL_tensor_float32_conversion +def OpRoundFToTF32INTEL : UnOp<"OpRoundFToTF32INTEL", 6426>; + // 3.42.12 Composite Instructions def OpVectorExtractDynamic: Op<77, (outs ID:$res), (ins TYPE:$type, vID:$vec, ID:$idx), diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index ad976e5..0cd9d78 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1564,6 +1564,13 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::BFloat16ConversionINTEL); } break; + case SPIRV::OpRoundFToTF32INTEL: + if (ST.canUseExtension( + SPIRV::Extension::SPV_INTEL_tensor_float32_conversion)) { + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_tensor_float32_conversion); + Reqs.addCapability(SPIRV::Capability::TensorFloat32RoundingINTEL); + } + break; case SPIRV::OpVariableLengthArrayINTEL: case SPIRV::OpSaveMemoryINTEL: case SPIRV::OpRestoreMemoryINTEL: diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index 548e9b7..614e83a 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -320,6 +320,7 @@ defm SPV_INTEL_subgroup_matrix_multiply_accumulate : ExtensionOperand<121>; defm SPV_INTEL_2d_block_io : ExtensionOperand<122>; defm SPV_INTEL_int4 : ExtensionOperand<123>; defm SPV_KHR_float_controls2 : ExtensionOperand<124>; +defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -529,6 +530,7 @@ defm Subgroup2DBlockTransformINTEL : CapabilityOperand<6229, 0, 0, [SPV_INTEL_2d defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; defm Int4TypeINTEL : CapabilityOperand<5112, 0, 0, [SPV_INTEL_int4], []>; defm Int4CooperativeMatrixINTEL : CapabilityOperand<5114, 0, 0, [SPV_INTEL_int4], [Int4TypeINTEL, CooperativeMatrixKHR]>; +defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp index d5f8492..b2cfd04 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp @@ -165,7 +165,7 @@ void SystemZMCAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned BitSize = getFixupKindInfo(Kind).TargetSize; unsigned Size = (BitSize + 7) / 8; - assert(Offset + Size <= Data.size() && "Invalid fixup offset!"); + assert(Offset + Size <= F.getSize() && "Invalid fixup offset!"); // Big-endian insertion of Size bytes. Value = extractBitsForFixup(Kind, Value, Fixup, getContext()); diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp index f987621..b02b6af 100644 --- a/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp +++ b/llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp @@ -174,7 +174,7 @@ void VEAsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the bits // from the fixup value. The Value has been "split up" into the // appropriate bitfields above. diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index 837fd8e..84eb15f 100644 --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -97,7 +97,7 @@ void WebAssemblyAsmBackend::applyFixup(const MCFragment &F, Value <<= Info.TargetOffset; unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); + assert(Offset + NumBytes <= F.getSize() && "Invalid fixup offset!"); // For each byte of the fragment that the fixup touches, mask in the // bits from the fixup value. diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 7f9d474..1efef83 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -690,7 +690,7 @@ void X86AsmBackend::applyFixup(const MCFragment &F, const MCFixup &Fixup, return; unsigned Size = getFixupKindSize(Kind); - assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!"); + assert(Fixup.getOffset() + Size <= F.getSize() && "Invalid fixup offset!"); int64_t SignedValue = static_cast<int64_t>(Value); if (IsResolved && Fixup.isPCRel()) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 3c24d2e..01da012 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -13404,7 +13404,7 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { return indicatePessimisticFixpoint(); if (BinSize == 0) { - auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false)); + auto NewAllocationSize = std::make_optional<TypeSize>(0, false); if (!changeAllocationSize(NewAllocationSize)) return ChangeStatus::UNCHANGED; return ChangeStatus::CHANGED; @@ -13422,8 +13422,7 @@ struct AAAllocationInfoImpl : public AAAllocationInfo { if (SizeOfBin >= *AllocationSize) return indicatePessimisticFixpoint(); - auto NewAllocationSize = - std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false)); + auto NewAllocationSize = std::make_optional<TypeSize>(SizeOfBin * 8, false); if (!changeAllocationSize(NewAllocationSize)) return ChangeStatus::UNCHANGED; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index c009c1e..b8c99f1 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -99,6 +99,9 @@ STATISTIC(SkippedCallsCloning, "Number of calls skipped during cloning due to unexpected operand"); STATISTIC(MismatchedCloneAssignments, "Number of callsites assigned to call multiple non-matching clones"); +STATISTIC(TotalMergeInvokes, "Number of merge invocations for nodes"); +STATISTIC(TotalMergeIters, "Number of merge iterations for nodes"); +STATISTIC(MaxMergeIters, "Max merge iterations for nodes"); static cl::opt<std::string> DotFilePathPrefix( "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, @@ -109,6 +112,11 @@ static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false), cl::Hidden, cl::desc("Export graph to dot files.")); +// TODO: Remove this option once new handling is validated more widely. +static cl::opt<bool> DoMergeIteration( + "memprof-merge-iteration", cl::init(true), cl::Hidden, + cl::desc("Iteratively apply merging on a node to catch new callers")); + // How much of the graph to export to dot. enum DotScope { All, // The full CCG graph. @@ -3995,7 +4003,7 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const { void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) { - auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction(); + auto *CurF = getCalleeFunc(CallerCall.call()); auto NewCalleeCloneNo = CalleeFunc.cloneNo(); if (isMemProfClone(*CurF)) { // If we already assigned this callsite to call a specific non-default @@ -4191,16 +4199,36 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::mergeClones( if (!Inserted.second) return; - // Make a copy since the recursive call may move a caller edge to a new - // callee, messing up the iterator. - auto CallerEdges = Node->CallerEdges; - for (auto CallerEdge : CallerEdges) { - // Skip any caller edge moved onto a different callee during recursion. - if (CallerEdge->Callee != Node) - continue; - mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode); + // Iteratively perform merging on this node to handle new caller nodes created + // during the recursive traversal. We could do something more elegant such as + // maintain a worklist, but this is a simple approach that doesn't cause a + // measureable compile time effect, as most nodes don't have many caller + // edges to check. + bool FoundUnvisited = true; + unsigned Iters = 0; + while (FoundUnvisited) { + Iters++; + FoundUnvisited = false; + // Make a copy since the recursive call may move a caller edge to a new + // callee, messing up the iterator. + auto CallerEdges = Node->CallerEdges; + for (auto CallerEdge : CallerEdges) { + // Skip any caller edge moved onto a different callee during recursion. + if (CallerEdge->Callee != Node) + continue; + // If we found an unvisited caller, note that we should check the caller + // edges again as mergeClones may add or change caller nodes. + if (DoMergeIteration && !Visited.contains(CallerEdge->Caller)) + FoundUnvisited = true; + mergeClones(CallerEdge->Caller, Visited, ContextIdToAllocationNode); + } } + TotalMergeInvokes++; + TotalMergeIters += Iters; + if (Iters > MaxMergeIters) + MaxMergeIters = Iters; + // Merge for this node after we handle its callers. mergeNodeCalleeClones(Node, Visited, ContextIdToAllocationNode); } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 00b877b..fe0f308 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -462,6 +462,13 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) { return ScalarPHI; } + // If SrcVec is a subvector starting at index 0, extract from the + // wider source vector + Value *V; + if (match(SrcVec, + m_Intrinsic<Intrinsic::vector_extract>(m_Value(V), m_Zero()))) + return ExtractElementInst::Create(V, Index); + // TODO come up with a n-ary matcher that subsumes both unary and // binary matchers. UnaryOperator *UO; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 56358b1..5ee3bb1 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2011,12 +2011,17 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN, NewPN->addIncoming(NewPhiValues[i], PN->getIncomingBlock(i)); if (IdenticalUsers) { - for (User *U : make_early_inc_range(PN->users())) { + // Collect and deduplicate users up-front to avoid iterator invalidation. + SmallSetVector<Instruction *, 4> ToReplace; + for (User *U : PN->users()) { Instruction *User = cast<Instruction>(U); if (User == &I) continue; - replaceInstUsesWith(*User, NewPN); - eraseInstFromFunction(*User); + ToReplace.insert(User); + } + for (Instruction *I : ToReplace) { + replaceInstUsesWith(*I, NewPN); + eraseInstFromFunction(*I); } OneUse = true; } @@ -2654,9 +2659,18 @@ static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, APInt NewOffset = TypeSize * *C2 + *C1; if (NewOffset.isZero() || (Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) { + GEPNoWrapFlags Flags = GEPNoWrapFlags::none(); + if (GEP.hasNoUnsignedWrap() && + cast<GEPOperator>(Src)->hasNoUnsignedWrap() && + match(GEP.getOperand(1), m_NUWAddLike(m_Value(), m_Value()))) { + Flags |= GEPNoWrapFlags::noUnsignedWrap(); + if (GEP.isInBounds() && cast<GEPOperator>(Src)->isInBounds()) + Flags |= GEPNoWrapFlags::inBounds(); + } + Value *GEPConst = - IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset)); - return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex); + IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset), "", Flags); + return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex, Flags); } return nullptr; diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 68094c3..c3f80f9 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -2508,6 +2508,12 @@ static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo, if (!GEP) return false; + // Do not try to hoist a constant GEP out of the loop via reassociation. + // Constant GEPs can often be folded into addressing modes, and reassociating + // them may inhibit CSE of a common base. + if (GEP->hasAllConstantIndices()) + return false; + auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand()); if (!Src || !Src->hasOneUse() || !L.contains(Src)) return false; diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp index f3e992c..04039b8 100644 --- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp @@ -1009,7 +1009,8 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM, // in simplified form, and also needs LCSSA. Running // this pass will simplify all loops that contain inner loops, // regardless of whether anything ends up being flattened. - LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr); + LoopAccessInfoManager LAIM(AR.SE, AR.AA, AR.DT, AR.LI, &AR.TTI, nullptr, + &AR.AC); for (Loop *InnerLoop : LN.getLoops()) { auto *OuterLoop = InnerLoop->getParentLoop(); if (!OuterLoop) diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 4f2bfb0..448dc2b 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -551,7 +551,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, const Function *F = L.getHeader()->getParent(); OptimizationRemarkEmitter ORE(F); - LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr); + LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr, nullptr, &LAR.AC); if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 320b792..6ffe841 100644 --- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -79,8 +79,7 @@ // ld.global.f32 %f4, [%rl6+132]; // much better // // Another improvement enabled by the LowerGEP flag is to lower a GEP with -// multiple indices to either multiple GEPs with a single index or arithmetic -// operations (depending on whether the target uses alias analysis in codegen). +// multiple indices to multiple GEPs with a single index. // Such transformation can have following benefits: // (1) It can always extract constants in the indices of structure type. // (2) After such Lowering, there are more optimization opportunities such as @@ -88,59 +87,33 @@ // // E.g. The following GEPs have multiple indices: // BB1: -// %p = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 3 +// %p = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 3 // load %p // ... // BB2: -// %p2 = getelementptr [10 x %struct]* %ptr, i64 %i, i64 %j1, i32 2 +// %p2 = getelementptr [10 x %struct], ptr %ptr, i64 %i, i64 %j1, i32 2 // load %p2 // ... // // We can not do CSE to the common part related to index "i64 %i". Lowering // GEPs can achieve such goals. -// If the target does not use alias analysis in codegen, this pass will -// lower a GEP with multiple indices into arithmetic operations: -// BB1: -// %1 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %3 = add i64 %1, %2 ; CSE opportunity -// %4 = mul i64 %j1, length_of_struct -// %5 = add i64 %3, %4 -// %6 = add i64 %3, struct_field_3 ; Constant offset -// %p = inttoptr i64 %6 to i32* -// load %p -// ... -// BB2: -// %7 = ptrtoint [10 x %struct]* %ptr to i64 ; CSE opportunity -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %9 = add i64 %7, %8 ; CSE opportunity -// %10 = mul i64 %j2, length_of_struct -// %11 = add i64 %9, %10 -// %12 = add i64 %11, struct_field_2 ; Constant offset -// %p = inttoptr i64 %12 to i32* -// load %p2 -// ... // -// If the target uses alias analysis in codegen, this pass will lower a GEP -// with multiple indices into multiple GEPs with a single index: +// This pass will lower a GEP with multiple indices into multiple GEPs with a +// single index: // BB1: -// %1 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity -// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %3 = getelementptr i8* %1, i64 %2 ; CSE opportunity +// %2 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %3 = getelementptr i8, ptr %ptr, i64 %2 ; CSE opportunity // %4 = mul i64 %j1, length_of_struct -// %5 = getelementptr i8* %3, i64 %4 -// %6 = getelementptr i8* %5, struct_field_3 ; Constant offset -// %p = bitcast i8* %6 to i32* +// %5 = getelementptr i8, ptr %3, i64 %4 +// %p = getelementptr i8, ptr %5, struct_field_3 ; Constant offset // load %p // ... // BB2: -// %7 = bitcast [10 x %struct]* %ptr to i8* ; CSE opportunity -// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity -// %9 = getelementptr i8* %7, i64 %8 ; CSE opportunity +// %8 = mul i64 %i, length_of_10xstruct ; CSE opportunity +// %9 = getelementptr i8, ptr %ptr, i64 %8 ; CSE opportunity // %10 = mul i64 %j2, length_of_struct -// %11 = getelementptr i8* %9, i64 %10 -// %12 = getelementptr i8* %11, struct_field_2 ; Constant offset -// %p2 = bitcast i8* %12 to i32* +// %11 = getelementptr i8, ptr %9, i64 %10 +// %p2 = getelementptr i8, ptr %11, struct_field_2 ; Constant offset // load %p2 // ... // @@ -408,16 +381,6 @@ private: void lowerToSingleIndexGEPs(GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset); - /// Lower a GEP with multiple indices into ptrtoint+arithmetics+inttoptr form. - /// Function splitGEP already split the original GEP into a variadic part and - /// a constant offset (i.e., AccumulativeByteOffset). This function lowers the - /// variadic part into a set of arithmetic operations and applies - /// AccumulativeByteOffset to it. - /// \p Variadic The variadic part of the original GEP. - /// \p AccumulativeByteOffset The constant offset. - void lowerToArithmetics(GetElementPtrInst *Variadic, - int64_t AccumulativeByteOffset); - /// Finds the constant offset within each index and accumulates them. If /// LowerGEP is true, it finds in indices of both sequential and structure /// types, otherwise it only finds in sequential indices. The output @@ -951,55 +914,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs( Variadic->eraseFromParent(); } -void -SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic, - int64_t AccumulativeByteOffset) { - IRBuilder<> Builder(Variadic); - Type *IntPtrTy = DL->getIntPtrType(Variadic->getType()); - assert(IntPtrTy == DL->getIndexType(Variadic->getType()) && - "Pointer type must match index type for arithmetic-based lowering of " - "split GEPs"); - - Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy); - gep_type_iterator GTI = gep_type_begin(*Variadic); - // Create ADD/SHL/MUL arithmetic operations for each sequential indices. We - // don't create arithmetics for structure indices, as they are accumulated - // in the constant offset index. - for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) { - if (GTI.isSequential()) { - Value *Idx = Variadic->getOperand(I); - // Skip zero indices. - if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) - if (CI->isZero()) - continue; - - APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(), - GTI.getSequentialElementStride(*DL)); - // Scale the index by element size. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - Idx = Builder.CreateShl( - Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2())); - } else { - Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize)); - } - } - // Create an ADD for each index. - ResultPtr = Builder.CreateAdd(ResultPtr, Idx); - } - } - - // Create an ADD for the constant offset index. - if (AccumulativeByteOffset != 0) { - ResultPtr = Builder.CreateAdd( - ResultPtr, ConstantInt::get(IntPtrTy, AccumulativeByteOffset)); - } - - ResultPtr = Builder.CreateIntToPtr(ResultPtr, Variadic->getType()); - Variadic->replaceAllUsesWith(ResultPtr); - Variadic->eraseFromParent(); -} - bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP, TargetTransformInfo &TTI) { auto PtrGEP = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand()); @@ -1091,8 +1005,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { // Notice that we don't remove struct field indices here. If LowerGEP is // disabled, a structure index is not accumulated and we still use the old // one. If LowerGEP is enabled, a structure index is accumulated in the - // constant offset. LowerToSingleIndexGEPs or lowerToArithmetics will later - // handle the constant offset and won't need a new structure index. + // constant offset. LowerToSingleIndexGEPs will later handle the constant + // offset and won't need a new structure index. gep_type_iterator GTI = gep_type_begin(*GEP); for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { if (GTI.isSequential()) { @@ -1167,22 +1081,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { GEP->setNoWrapFlags(NewGEPFlags); - // Lowers a GEP to either GEPs with a single index or arithmetic operations. + // Lowers a GEP to GEPs with a single index. if (LowerGEP) { - // As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to - // arithmetic operations if the target uses alias analysis in codegen. - // Additionally, pointers that aren't integral (and so can't be safely - // converted to integers) or those whose offset size is different from their - // pointer size (which means that doing integer arithmetic on them could - // affect that data) can't be lowered in this way. - unsigned AddrSpace = GEP->getPointerAddressSpace(); - bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) != - DL->getIndexSizeInBits(AddrSpace); - if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) || - PointerHasExtraData) - lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); - else - lowerToArithmetics(GEP, AccumulativeByteOffset); + lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset); return true; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 969d225..c47fd942 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1665,13 +1665,12 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { // Keep a record of all the exiting blocks. SmallVector<const SCEVPredicate *, 4> Predicates; - std::optional<std::pair<BasicBlock *, BasicBlock *>> SingleUncountableEdge; + BasicBlock *SingleUncountableExitingBlock = nullptr; for (BasicBlock *BB : ExitingBlocks) { const SCEV *EC = PSE.getSE()->getPredicatedExitCount(TheLoop, BB, &Predicates); if (isa<SCEVCouldNotCompute>(EC)) { - SmallVector<BasicBlock *, 2> Succs(successors(BB)); - if (Succs.size() != 2) { + if (size(successors(BB)) != 2) { reportVectorizationFailure( "Early exiting block does not have exactly two successors", "Incorrect number of successors from early exiting block", @@ -1679,15 +1678,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { return false; } - BasicBlock *ExitBlock; - if (!TheLoop->contains(Succs[0])) - ExitBlock = Succs[0]; - else { - assert(!TheLoop->contains(Succs[1])); - ExitBlock = Succs[1]; - } - - if (SingleUncountableEdge) { + if (SingleUncountableExitingBlock) { reportVectorizationFailure( "Loop has too many uncountable exits", "Cannot vectorize early exit loop with more than one early exit", @@ -1695,7 +1686,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { return false; } - SingleUncountableEdge = {BB, ExitBlock}; + SingleUncountableExitingBlock = BB; } else CountableExitingBlocks.push_back(BB); } @@ -1705,7 +1696,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { // PSE.getSymbolicMaxBackedgeTakenCount() below. Predicates.clear(); - if (!SingleUncountableEdge) { + if (!SingleUncountableExitingBlock) { LLVM_DEBUG(dbgs() << "LV: Cound not find any uncountable exits"); return false; } @@ -1713,7 +1704,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { // The only supported early exit loops so far are ones where the early // exiting block is a unique predecessor of the latch block. BasicBlock *LatchPredBB = LatchBB->getUniquePredecessor(); - if (LatchPredBB != SingleUncountableEdge->first) { + if (LatchPredBB != SingleUncountableExitingBlock) { reportVectorizationFailure("Early exit is not the latch predecessor", "Cannot vectorize early exit loop", "EarlyExitNotLatchPredecessor", ORE, TheLoop); @@ -1766,7 +1757,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { } // The vectoriser cannot handle loads that occur after the early exit block. - assert(LatchBB->getUniquePredecessor() == SingleUncountableEdge->first && + assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && "Expected latch predecessor to be the early exiting block"); // TODO: Handle loops that may fault. @@ -1789,7 +1780,7 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { LLVM_DEBUG(dbgs() << "LV: Found an early exit loop with symbolic max " "backedge taken count: " << *SymbolicMaxBTC << '\n'); - UncountableEdge = SingleUncountableEdge; + UncountableExitingBB = SingleUncountableExitingBlock; return true; } @@ -1861,7 +1852,8 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return false; } else { if (!isVectorizableEarlyExitLoop()) { - UncountableEdge = std::nullopt; + assert(!hasUncountableEarlyExit() && + "Must be false without vectorizable early-exit loop"); if (DoExtraAnalysis) Result = false; else diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 850c4a1..b4ea70e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1624,7 +1624,7 @@ private: /// presence of a cost for an instruction in the mapping indicates that the /// instruction will be scalarized when vectorizing with the associated /// vectorization factor. The entries are VF-ScalarCostTy pairs. - DenseMap<ElementCount, ScalarCostsTy> InstsToScalarize; + MapVector<ElementCount, ScalarCostsTy> InstsToScalarize; /// Holds the instructions known to be uniform after vectorization. /// The data is collected per VF. @@ -9788,6 +9788,10 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, match( P.getIncomingValueForBlock(EPI.MainLoopIterationCountCheck), m_SpecificInt(0)) && + any_of(P.incoming_values(), + [&EPI](Value *Inc) { + return Inc == EPI.VectorTripCount; + }) && all_of(P.incoming_values(), [&EPI](Value *Inc) { return Inc == EPI.VectorTripCount || match(Inc, m_SpecificInt(0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index d249a34..11b4677 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3548,6 +3548,8 @@ void VPInterleaveRecipe::execute(VPTransformState &State) { // Vectorize the interleaved store group. Value *MaskForGaps = createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group); + assert(((MaskForGaps != nullptr) == NeedsMaskForGaps) && + "Mismatch between NeedsMaskForGaps and MaskForGaps"); assert((!MaskForGaps || !State.VF.isScalable()) && "masking gaps for scalable vectors is not yet supported."); ArrayRef<VPValue *> StoredValues = getStoredValues(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 18d331b..fcbc86f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1413,7 +1413,7 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan, static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan, ElementCount BestVF, unsigned BestUF, ScalarEvolution &SE) { - if (match(Cond, m_Binary<Instruction::Or>(m_VPValue(), m_VPValue()))) + if (match(Cond, m_BinaryOr(m_VPValue(), m_VPValue()))) return any_of(Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF, &SE](VPValue *C) { return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE); @@ -2563,7 +2563,8 @@ void VPlanTransforms::createInterleaveGroups( } bool NeedsMaskForGaps = - IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed; + (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) || + (!StoredValues.empty() && !IG->isFull()); Instruction *IRInsertPos = IG->getInsertPos(); auto *InsertPos = diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll index b824f53..a579eb3 100644 --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -377,6 +377,144 @@ define void @maximum() { ret void } +define void @minimumnum() { +; CHECK-LABEL: 'minimumnum' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call float @llvm.minimumnum.f32(float poison, float poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call double @llvm.minimumnum.f64(double poison, double poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %3 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %4 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %5 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %6 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %7 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %8 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %9 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 40 for: %10 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call float @llvm.minimumnum.f32(float poison, float poison) + call double @llvm.minimumnum.f64(double poison, double poison) + call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) + call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) + call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) + call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) + call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison) + call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison) + call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison) + call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison) + ret void +} + +define void @minimumnum_fp16() { +; CHECK-NOF16-LABEL: 'minimumnum_fp16' +; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %1 = call half @llvm.minimumnum.f16(half poison, half poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-F16-LABEL: 'minimumnum_fp16' +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call half @llvm.minimumnum.f16(half poison, half poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %4 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %5 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call half @llvm.minimumnum.f32(half poison, half poison) + call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) + call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) + call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) + call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) + ret void +} + +define void @minimumnum_bf16() { +; CHECK-LABEL: 'minimumnum_bf16' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %1 = call bfloat @llvm.minimumnum.bf16(bfloat poison, bfloat poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call bfloat @llvm.minimumnum.f32(bfloat poison, bfloat poison) + call <2 x bfloat> @llvm.minimumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison) + call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) + call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) + call <16 x bfloat> @llvm.minimumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison) + ret void +} + +define void @maximumnum() { +; CHECK-LABEL: 'maximumnum' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call float @llvm.maximumnum.f32(float poison, float poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call double @llvm.maximumnum.f64(double poison, double poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %3 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %4 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %5 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %6 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %7 = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %8 = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %9 = call <8 x double> @llvm.maximumnum.v8f64(<8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 40 for: %10 = call <16 x double> @llvm.maximumnum.v16f64(<16 x double> poison, <16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call float @llvm.maximumnum.f32(float poison, float poison) + call double @llvm.maximumnum.f64(double poison, double poison) + call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) + call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) + call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) + call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) + call <2 x double> @llvm.maximumnum.v2f64(<2 x double> poison, <2 x double> poison) + call <4 x double> @llvm.maximumnum.v4f64(<4 x double> poison, <4 x double> poison) + call <8 x double> @llvm.maximumnum.v8f64(<8 x double> poison, <8 x double> poison) + call <16 x double> @llvm.maximumnum.v16f64(<16 x double> poison, <16 x double> poison) + ret void +} + +define void @maximumnum_fp16() { +; CHECK-NOF16-LABEL: 'maximumnum_fp16' +; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %1 = call half @llvm.maximumnum.f16(half poison, half poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-F16-LABEL: 'maximumnum_fp16' +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call half @llvm.maximumnum.f16(half poison, half poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %4 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %5 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call half @llvm.maximumnum.f32(half poison, half poison) + call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) + call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) + call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) + call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) + ret void +} + +define void @maximumnum_bf16() { +; CHECK-LABEL: 'maximumnum_bf16' +; CHECK-NEXT: Cost Model: Found costs of 1 for: %1 = call bfloat @llvm.maximumnum.bf16(bfloat poison, bfloat poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call bfloat @llvm.maximumnum.f32(bfloat poison, bfloat poison) + call <2 x bfloat> @llvm.maximumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison) + call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) + call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) + call <16 x bfloat> @llvm.maximumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison) + ret void +} + declare i8 @llvm.umin.i8(i8, i8) declare i16 @llvm.umin.i16(i16, i16) declare i32 @llvm.umin.i32(i32, i32) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll index 829ce12..5d11133 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll @@ -255,6 +255,89 @@ define void @maximum() { ret void } +define void @minimumnum() { +; CHECK-LABEL: 'minimumnum' +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x float> @llvm.minimumnum.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x float> @llvm.minimumnum.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 4 x double> @llvm.minimumnum.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 8 x double> @llvm.minimumnum.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 16 x double> @llvm.minimumnum.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %10 = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %11 = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %12 = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %13 = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %14 = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %15 = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %16 = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %17 = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <vscale x 2 x float> @llvm.minimumnum(<vscale x 2 x float> poison, <vscale x 2 x float> poison) + call <vscale x 4 x float> @llvm.minimumnum(<vscale x 4 x float> poison, <vscale x 4 x float> poison) + call <vscale x 8 x float> @llvm.minimumnum(<vscale x 8 x float> poison, <vscale x 8 x float> poison) + call <vscale x 16 x float> @llvm.minimumnum(<vscale x 16 x float> poison, <vscale x 16 x float> poison) + call <vscale x 2 x double> @llvm.minimumnum(<vscale x 2 x double> poison, <vscale x 2 x double> poison) + call <vscale x 4 x double> @llvm.minimumnum(<vscale x 4 x double> poison, <vscale x 4 x double> poison) + call <vscale x 8 x double> @llvm.minimumnum(<vscale x 8 x double> poison, <vscale x 8 x double> poison) + call <vscale x 16 x double> @llvm.minimumnum(<vscale x 16 x double> poison, <vscale x 16 x double> poison) + ret void + call <vscale x 2 x half> @llvm.minimumnum(<vscale x 2 x half> poison, <vscale x 2 x half> poison) + call <vscale x 4 x half> @llvm.minimumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison) + call <vscale x 8 x half> @llvm.minimumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison) + call <vscale x 16 x half> @llvm.minimumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison) + call <vscale x 2 x bfloat> @llvm.minimumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) + call <vscale x 4 x bfloat> @llvm.minimumnum(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + call <vscale x 8 x bfloat> @llvm.minimumnum(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + call <vscale x 16 x bfloat> @llvm.minimumnum(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) + ret void +} + +define void @maximumnum() { +; CHECK-LABEL: 'maximumnum' +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x float> @llvm.maximumnum.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x float> @llvm.maximumnum.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x float> @llvm.maximumnum.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x float> @llvm.maximumnum.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 2 x double> @llvm.maximumnum.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 4 x double> @llvm.maximumnum.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 8 x double> @llvm.maximumnum.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 16 x double> @llvm.maximumnum.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %10 = call <vscale x 2 x half> @llvm.maximumnum.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %11 = call <vscale x 4 x half> @llvm.maximumnum.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %12 = call <vscale x 8 x half> @llvm.maximumnum.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %13 = call <vscale x 16 x half> @llvm.maximumnum.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %14 = call <vscale x 2 x bfloat> @llvm.maximumnum.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %15 = call <vscale x 4 x bfloat> @llvm.maximumnum.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %16 = call <vscale x 8 x bfloat> @llvm.maximumnum.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %17 = call <vscale x 16 x bfloat> @llvm.maximumnum.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <vscale x 2 x float> @llvm.maximumnum(<vscale x 2 x float> poison, <vscale x 2 x float> poison) + call <vscale x 4 x float> @llvm.maximumnum(<vscale x 4 x float> poison, <vscale x 4 x float> poison) + call <vscale x 8 x float> @llvm.maximumnum(<vscale x 8 x float> poison, <vscale x 8 x float> poison) + call <vscale x 16 x float> @llvm.maximumnum(<vscale x 16 x float> poison, <vscale x 16 x float> poison) + call <vscale x 2 x double> @llvm.maximumnum(<vscale x 2 x double> poison, <vscale x 2 x double> poison) + call <vscale x 4 x double> @llvm.maximumnum(<vscale x 4 x double> poison, <vscale x 4 x double> poison) + call <vscale x 8 x double> @llvm.maximumnum(<vscale x 8 x double> poison, <vscale x 8 x double> poison) + call <vscale x 16 x double> @llvm.maximumnum(<vscale x 16 x double> poison, <vscale x 16 x double> poison) + ret void + call <vscale x 2 x half> @llvm.maximumnum(<vscale x 2 x half> poison, <vscale x 2 x half> poison) + call <vscale x 4 x half> @llvm.maximumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison) + call <vscale x 8 x half> @llvm.maximumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison) + call <vscale x 16 x half> @llvm.maximumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison) + call <vscale x 2 x bfloat> @llvm.maximumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) + call <vscale x 4 x bfloat> @llvm.maximumnum(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison) + call <vscale x 8 x bfloat> @llvm.maximumnum(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison) + call <vscale x 16 x bfloat> @llvm.maximumnum(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison) + ret void +} + + declare <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>) declare <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>) declare <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>) diff --git a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll index 1dc8d4a7..207a44d 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/early-exit-runtime-checks.ll @@ -505,7 +505,7 @@ e.1: ret i32 1 } -define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) { +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption(ptr %A, ptr %B) nosync nofree { ; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_known_deref_via_assumption' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Memory dependences are safe with run-time checks @@ -518,10 +518,10 @@ define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_kno ; CHECK-NEXT: %gep.A = getelementptr inbounds i32, ptr %A, i64 %iv ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: -; CHECK-NEXT: (Low: %B High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: (Low: %B High: (2000 + %B)) ; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop.header> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %A High: inttoptr (i64 -1 to ptr)) +; CHECK-NEXT: (Low: %A High: (2000 + %A)) ; CHECK-NEXT: Member: {%A,+,4}<nuw><%loop.header> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -565,7 +565,7 @@ e.2: ret void } -define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) { +define void @all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small(ptr %A, ptr %B) nosync nofree { ; CHECK-LABEL: 'all_exits_dominate_latch_countable_exits_at_most_500_iterations_deref_via_assumption_too_small' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Memory dependences are safe with run-time checks diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll index d93d521..d896a1b 100644 --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll @@ -1,16 +1,38 @@ -; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -aa-pipeline=tbaa,basic-aa -passes=gvn -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMDEP +; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='gvn<memoryssa>' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MEMSSA target datalayout = "e-p:64:64:64" ; GVN should ignore the store to p1 to see that the load from p is ; fully redundant. -; CHECK: @yes -; CHECK: if.then: -; CHECK-NEXT: store i32 0, ptr %q -; CHECK-NEXT: ret void - define void @yes(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind { +; CHECK-MEMDEP-LABEL: define void @yes( +; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]] +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-MEMDEP-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK-MEMDEP: [[IF_THEN]]: +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[Q]], align 4 +; CHECK-MEMDEP-NEXT: ret void +; CHECK-MEMDEP: [[IF_ELSE]]: +; CHECK-MEMDEP-NEXT: ret void +; +; CHECK-MEMSSA-LABEL: define void @yes( +; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]] +; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-MEMSSA-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK-MEMSSA: [[IF_THEN]]: +; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA0]] +; CHECK-MEMSSA-NEXT: store i32 [[T]], ptr [[Q]], align 4 +; CHECK-MEMSSA-NEXT: ret void +; CHECK-MEMSSA: [[IF_ELSE]]: +; CHECK-MEMSSA-NEXT: ret void +; entry: store i32 0, ptr %p, !tbaa !1 store i32 1, ptr %p1, !tbaa !2 @@ -30,16 +52,22 @@ if.else: ; the other type could be unified with the first type, however for now, GVN ; should just be conservative. -; CHECK: @watch_out_for_type_change -; CHECK: if.then: -; CHECK: %t = load i32, ptr %p -; CHECK: store i32 %t, ptr %q -; CHECK: ret void -; CHECK: if.else: -; CHECK: %u = load i32, ptr %p -; CHECK: store i32 %u, ptr %q - define void @watch_out_for_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind { +; CHECK-LABEL: define void @watch_out_for_type_change( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: store i32 [[T]], ptr [[Q]], align 4 +; CHECK-NEXT: ret void +; CHECK: [[IF_ELSE]]: +; CHECK-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8:![0-9]+]] +; CHECK-NEXT: store i32 [[U]], ptr [[Q]], align 4 +; CHECK-NEXT: ret void +; entry: store i32 0, ptr %p, !tbaa !1 store i32 1, ptr %p1, !tbaa !2 @@ -59,15 +87,36 @@ if.else: ; As before, but the types are swapped. This time GVN does managed to ; eliminate one of the loads before noticing the type mismatch. -; CHECK: @watch_out_for_another_type_change -; CHECK: if.then: -; CHECK: store i32 0, ptr %q -; CHECK: ret void -; CHECK: if.else: -; CHECK: %u = load i32, ptr %p -; CHECK: store i32 %u, ptr %q - define void @watch_out_for_another_type_change(i1 %c, ptr %p, ptr %p1, ptr %q) nounwind { +; CHECK-MEMDEP-LABEL: define void @watch_out_for_another_type_change( +; CHECK-MEMDEP-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { +; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]] +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]] +; CHECK-MEMDEP-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]] +; CHECK-MEMDEP-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK-MEMDEP: [[IF_THEN]]: +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[Q]], align 4 +; CHECK-MEMDEP-NEXT: ret void +; CHECK-MEMDEP: [[IF_ELSE]]: +; CHECK-MEMDEP-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]] +; CHECK-MEMDEP-NEXT: store i32 [[U]], ptr [[Q]], align 4 +; CHECK-MEMDEP-NEXT: ret void +; +; CHECK-MEMSSA-LABEL: define void @watch_out_for_another_type_change( +; CHECK-MEMSSA-SAME: i1 [[C:%.*]], ptr [[P:%.*]], ptr [[P1:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { +; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]] +; CHECK-MEMSSA-NEXT: store i32 0, ptr [[P]], align 4, !tbaa [[TBAA0]] +; CHECK-MEMSSA-NEXT: store i32 1, ptr [[P1]], align 4, !tbaa [[TBAA3]] +; CHECK-MEMSSA-NEXT: br i1 [[C]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]] +; CHECK-MEMSSA: [[IF_THEN]]: +; CHECK-MEMSSA-NEXT: [[T:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA8]] +; CHECK-MEMSSA-NEXT: store i32 [[T]], ptr [[Q]], align 4 +; CHECK-MEMSSA-NEXT: ret void +; CHECK-MEMSSA: [[IF_ELSE]]: +; CHECK-MEMSSA-NEXT: [[U:%.*]] = load i32, ptr [[P]], align 4, !tbaa [[TBAA5]] +; CHECK-MEMSSA-NEXT: store i32 [[U]], ptr [[Q]], align 4 +; CHECK-MEMSSA-NEXT: ret void +; entry: store i32 0, ptr %p, !tbaa !1 store i32 1, ptr %p1, !tbaa !2 @@ -94,3 +143,26 @@ if.else: !7 = !{!"outer space", !9} !8 = !{!"brick red", !5} !9 = !{!"observable universe"} +;. +; CHECK-MEMDEP: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK-MEMDEP: [[META1]] = !{!"red", [[META2:![0-9]+]]} +; CHECK-MEMDEP: [[META2]] = !{} +; CHECK-MEMDEP: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK-MEMDEP: [[META4]] = !{!"blu", [[META2]]} +; CHECK-MEMDEP: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK-MEMDEP: [[META6]] = !{!"outer space", [[META7:![0-9]+]]} +; CHECK-MEMDEP: [[META7]] = !{!"observable universe"} +; CHECK-MEMDEP: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK-MEMDEP: [[META9]] = !{!"brick red", [[META1]]} +;. +; CHECK-MEMSSA: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK-MEMSSA: [[META1]] = !{!"red", [[META2:![0-9]+]]} +; CHECK-MEMSSA: [[META2]] = !{} +; CHECK-MEMSSA: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +; CHECK-MEMSSA: [[META4]] = !{!"blu", [[META2]]} +; CHECK-MEMSSA: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK-MEMSSA: [[META6]] = !{!"outer space", [[META7:![0-9]+]]} +; CHECK-MEMSSA: [[META7]] = !{!"observable universe"} +; CHECK-MEMSSA: [[TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +; CHECK-MEMSSA: [[META9]] = !{!"brick red", [[META1]]} +;. diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll index 10c656a..30b74cb 100644 --- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll +++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/intrinsics.ll @@ -796,6 +796,41 @@ define amdgpu_kernel void @v_permlane32_swap(ptr addrspace(1) %out, i32 %src0, i ret void } +; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2) +define amdgpu_kernel void @v_permlane_bcast_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { + %result= call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2) + store i32 %result, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2) +define amdgpu_kernel void @v_permlane_up_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { + %result= call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2) + store i32 %result, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2) +define amdgpu_kernel void @v_permlane_down_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { + %result= call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2) + store i32 %result, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2) +define amdgpu_kernel void @v_permlane_xor_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { + %result= call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2) + store i32 %result, ptr addrspace(1) %out + ret void +} + +; CHECK: DIVERGENT: %result = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 %src1) +define amdgpu_kernel void @v_permlane_idx_gen_b32(ptr addrspace(1) %out, i32 %src0, i32 %src1) { + %result= call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 %src1) + store i32 %result, ptr addrspace(1) %out + ret void +} + ; CHECK: DIVERGENT: %v = call i32 @llvm.amdgcn.dead.i32() define amdgpu_cs_chain void @dead(ptr addrspace(1) %out) { %v = call i32 @llvm.amdgcn.dead.i32() diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll index 578038b..d9cdac4 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -1,8 +1,8 @@ ; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-IR %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64" @@ -38,24 +38,12 @@ if.end: ; preds = %if.then, %entry ; CHECK-NOT: madd ; CHECK:ldr -; CHECK-NoAA-LABEL: @test_GEP_CSE( -; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint ptr %string to i64 -; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] -; CHECK-NoAA: add i64 [[PTR2]], 23052 -; CHECK-NoAA: inttoptr -; CHECK-NoAA: if.then: -; CHECK-NoAA-NOT: ptrtoint -; CHECK-NoAA-NOT: mul -; CHECK-NoAA: add i64 [[PTR2]], 23048 -; CHECK-NoAA: inttoptr - -; CHECK-UseAA-LABEL: @test_GEP_CSE( -; CHECK-UseAA: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-UseAA: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]] -; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23052 -; CHECK-UseAA: if.then: -; CHECK-UseAA: getelementptr i8, ptr [[PTR1]], i64 23048 +; CHECK-IR-LABEL: @test_GEP_CSE( +; CHECK-IR: [[IDX:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 +; CHECK-IR: [[PTR1:%[a-zA-Z0-9]+]] = getelementptr i8, ptr %string, i64 [[IDX]] +; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23052 +; CHECK-IR: if.then: +; CHECK-IR: getelementptr i8, ptr [[PTR1]], i64 23048 %class.my = type { i32, [128 x i32], i32, [256 x %struct.pt]} %struct.pt = type { ptr, i32, i32 } diff --git a/llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll b/llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll new file mode 100644 index 0000000..4a601f1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64ec-available-externally.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple arm64ec-windows-msvc -o - %s | FileCheck %s + +; Arm64EC Regression Test: The Arm64EC Call Lowering was placing "available +; externally" items in COMDATs, which is not permitted by the module verifier. + +define available_externally float @f() { +entry: + ret float 0x0 +} + +define i32 @caller() { +entry: + call float @f() + ret i32 0 +} + +; Normal function gets an entry thunk, but not an exit thunk. +; CHECK-DAG: $ientry_thunk$cdecl$i8$v: +; CHECK-NOT: $iexit_thunk$cdecl$i8$v: + +; Available Externally function gets an exit thunk, but not an entry thunk. +; CHECK-DAG: $iexit_thunk$cdecl$f$v: +; CHECK-DAG: "#f$exit_thunk": +; CHECK-NOT: $ientry_thunk$cdecl$f$v: diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll index 880bd29..d67aa08 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll @@ -14,20 +14,19 @@ target triple = "aarch64" define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: whilelo p1.d, xzr, x8 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: rdvl x10, #2 -; CHECK-NEXT: mov x11, x9 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: zip2 z0.d, z1.d, z1.d -; CHECK-NEXT: zip1 z1.d, z1.d, z1.d +; CHECK-NEXT: mov x11, x9 ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: zip2 p2.d, p1.d, p1.d -; CHECK-NEXT: mov z6.d, z1.d -; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: mov z6.d, z0.d +; CHECK-NEXT: mov z7.d, z1.d ; CHECK-NEXT: zip1 p1.d, p1.d, p1.d ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #1, mul vl] ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x1, #1, mul vl] @@ -39,14 +38,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 -; CHECK-NEXT: mov z0.d, p2/m, z7.d -; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: mov z1.d, p2/m, z7.d +; CHECK-NEXT: mov z0.d, p1/m, z6.d ; CHECK-NEXT: whilelo p1.d, x11, x8 ; CHECK-NEXT: add x11, x11, x9 ; CHECK-NEXT: b.mi .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d ; CHECK-NEXT: faddv d0, p0, z2.d ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -111,21 +110,20 @@ exit.block: ; preds = %vector.body define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) { ; CHECK-LABEL: complex_mul_predicated_v2f64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w11, #100 // =0x64 ; CHECK-NEXT: neg x10, x9 +; CHECK-NEXT: mov w11, #100 // =0x64 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: and x10, x10, x11 ; CHECK-NEXT: rdvl x11, #2 -; CHECK-NEXT: zip2 z0.d, z1.d, z1.d -; CHECK-NEXT: zip1 z1.d, z1.d, z1.d ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x8, lsl #2] -; CHECK-NEXT: mov z6.d, z1.d -; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: mov z6.d, z0.d +; CHECK-NEXT: mov z7.d, z1.d ; CHECK-NEXT: add x8, x8, x9 ; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0 ; CHECK-NEXT: cmp x10, x8 @@ -141,12 +139,12 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr % ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 -; CHECK-NEXT: mov z0.d, p2/m, z7.d -; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: mov z1.d, p2/m, z7.d +; CHECK-NEXT: mov z0.d, p1/m, z6.d ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d ; CHECK-NEXT: faddv d0, p0, z2.d ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -213,21 +211,20 @@ exit.block: ; preds = %vector.body define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, ptr %cond) { ; CHECK-LABEL: complex_mul_predicated_x2_v2f64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, #100 // =0x64 -; CHECK-NEXT: cntd x9 ; CHECK-NEXT: whilelo p1.d, xzr, x8 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: rdvl x10, #2 -; CHECK-NEXT: cnth x11 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cnth x11 ; CHECK-NEXT: mov x12, x9 -; CHECK-NEXT: zip2 z0.d, z1.d, z1.d -; CHECK-NEXT: zip1 z1.d, z1.d, z1.d ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2] -; CHECK-NEXT: mov z6.d, z1.d -; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: mov z6.d, z0.d +; CHECK-NEXT: mov z7.d, z1.d ; CHECK-NEXT: add x2, x2, x11 ; CHECK-NEXT: and z2.d, z2.d, #0xffffffff ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 @@ -243,14 +240,14 @@ define %"class.std::complex" @complex_mul_predicated_x2_v2f64(ptr %a, ptr %b, pt ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 -; CHECK-NEXT: mov z0.d, p2/m, z7.d -; CHECK-NEXT: mov z1.d, p1/m, z6.d +; CHECK-NEXT: mov z1.d, p2/m, z7.d +; CHECK-NEXT: mov z0.d, p1/m, z6.d ; CHECK-NEXT: whilelo p1.d, x12, x8 ; CHECK-NEXT: add x12, x12, x9 ; CHECK-NEXT: b.mi .LBB2_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d ; CHECK-NEXT: faddv d0, p0, z2.d ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll index 29be231..0646ca4 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll @@ -14,15 +14,14 @@ target triple = "aarch64" define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w10, #100 // =0x64 ; CHECK-NEXT: neg x9, x8 +; CHECK-NEXT: mov w10, #100 // =0x64 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and x9, x9, x10 ; CHECK-NEXT: rdvl x10, #2 -; CHECK-NEXT: zip2 z0.d, z1.d, z1.d -; CHECK-NEXT: zip1 z1.d, z1.d, z1.d ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr z2, [x0, #1, mul vl] @@ -32,14 +31,14 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { ; CHECK-NEXT: ldr z5, [x1] ; CHECK-NEXT: add x1, x1, x10 ; CHECK-NEXT: add x0, x0, x10 -; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #0 -; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #0 -; CHECK-NEXT: fcmla z1.d, p0/m, z5.d, z3.d, #90 -; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z2.d, #90 +; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0 +; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0 +; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90 +; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #90 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z1.d, z0.d, z1.d ; CHECK-NEXT: faddv d0, p0, z2.d ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -183,17 +182,16 @@ exit.block: ; preds = %vector.body define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_v2f64_unrolled: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w10, #1000 // =0x3e8 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: neg x9, x8 +; CHECK-NEXT: mov w10, #1000 // =0x3e8 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and x9, x9, x10 ; CHECK-NEXT: rdvl x10, #4 -; CHECK-NEXT: zip2 z0.d, z1.d, z1.d -; CHECK-NEXT: zip1 z1.d, z1.d, z1.d -; CHECK-NEXT: mov z2.d, z1.d -; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr z4, [x0, #1, mul vl] @@ -207,20 +205,20 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) { ; CHECK-NEXT: ldr z18, [x1, #3, mul vl] ; CHECK-NEXT: ldr z19, [x1, #2, mul vl] ; CHECK-NEXT: add x1, x1, x10 -; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #0 -; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0 +; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #0 +; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #0 ; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #0 ; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #0 -; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z5.d, #90 -; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90 +; CHECK-NEXT: fcmla z0.d, p0/m, z16.d, z5.d, #90 +; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z4.d, #90 ; CHECK-NEXT: fcmla z3.d, p0/m, z18.d, z6.d, #90 ; CHECK-NEXT: fcmla z2.d, p0/m, z19.d, z17.d, #90 ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %exit.block ; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d -; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z5.d, z0.d, z1.d ; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d -; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d ; CHECK-NEXT: fadd z1.d, z4.d, z5.d ; CHECK-NEXT: fadd z2.d, z2.d, z0.d ; CHECK-NEXT: faddv d0, p0, z1.d @@ -310,15 +308,15 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia ; CHECK-LABEL: reduction_mix: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w11, #100 // =0x64 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: neg x10, x9 +; CHECK-NEXT: mov w11, #100 // =0x64 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: and x10, x10, x11 ; CHECK-NEXT: rdvl x11, #2 -; CHECK-NEXT: zip2 z0.d, z2.d, z2.d -; CHECK-NEXT: zip1 z1.d, z2.d, z2.d ; CHECK-NEXT: .LBB3_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr z3, [x0] @@ -327,13 +325,13 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalia ; CHECK-NEXT: ld1w { z5.d }, p0/z, [x3, x8, lsl #2] ; CHECK-NEXT: add x8, x8, x9 ; CHECK-NEXT: cmp x10, x8 -; CHECK-NEXT: fadd z0.d, z4.d, z0.d -; CHECK-NEXT: fadd z1.d, z3.d, z1.d +; CHECK-NEXT: fadd z1.d, z4.d, z1.d +; CHECK-NEXT: fadd z0.d, z3.d, z0.d ; CHECK-NEXT: add z2.d, z5.d, z2.d ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %middle.block -; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z3.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z1.d, z0.d, z1.d ; CHECK-NEXT: uaddv d2, p0, z2.d ; CHECK-NEXT: faddv d0, p0, z3.d ; CHECK-NEXT: faddv d1, p0, z1.d diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll index a9618fd..05ecc9e 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll @@ -131,18 +131,83 @@ define <4 x i64> @interleave2_v4i64(<2 x i64> %vec0, <2 x i64> %vec1) { ret <4 x i64> %retval } +define <4 x i16> @interleave2_same_const_splat_v4i16() { +; CHECK-SD-LABEL: interleave2_same_const_splat_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: movi v0.4h, #3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: interleave2_same_const_splat_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #3 // =0x3 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: ret + %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3)) + ret <4 x i16> %retval +} + +define <4 x i16> @interleave2_diff_const_splat_v4i16() { +; CHECK-SD-LABEL: interleave2_diff_const_splat_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI11_0 +; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI11_0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: interleave2_diff_const_splat_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov w8, #3 // =0x3 +; CHECK-GI-NEXT: mov w9, #4 // =0x4 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov v0.h[1], w8 +; CHECK-GI-NEXT: mov v1.h[1], w9 +; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret + %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 4)) + ret <4 x i16> %retval +} -; Float declarations -declare <4 x half> @llvm.vector.interleave2.v4f16(<2 x half>, <2 x half>) -declare <8 x half> @llvm.vector.interleave2.v8f16(<4 x half>, <4 x half>) -declare <16 x half> @llvm.vector.interleave2.v16f16(<8 x half>, <8 x half>) -declare <4 x float> @llvm.vector.interleave2.v4f32(<2 x float>, <2 x float>) -declare <8 x float> @llvm.vector.interleave2.v8f32(<4 x float>, <4 x float>) -declare <4 x double> @llvm.vector.interleave2.v4f64(<2 x double>, <2 x double>) - -; Integer declarations -declare <32 x i8> @llvm.vector.interleave2.v32i8(<16 x i8>, <16 x i8>) -declare <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16>, <8 x i16>) -declare <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32>, <4 x i32>) -declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>) +define <4 x i16> @interleave2_same_nonconst_splat_v4i16(i16 %a) { +; CHECK-SD-LABEL: interleave2_same_nonconst_splat_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v0.4h, w0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: interleave2_same_nonconst_splat_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v0.4h, w0 +; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v0.4h +; CHECK-GI-NEXT: ret + %ins = insertelement <2 x i16> poison, i16 %a, i32 0 + %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> <i32 0, i32 0> + %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> %splat, <2 x i16> %splat) + ret <4 x i16> %retval +} + +define <4 x i16> @interleave2_diff_nonconst_splat_v4i16(i16 %a, i16 %b) { +; CHECK-SD-LABEL: interleave2_diff_nonconst_splat_v4i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: mov v0.h[1], w0 +; CHECK-SD-NEXT: mov v0.h[2], w1 +; CHECK-SD-NEXT: mov v0.h[3], w1 +; CHECK-SD-NEXT: rev32 v1.4h, v0.4h +; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: interleave2_diff_nonconst_splat_v4i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v0.4h, w0 +; CHECK-GI-NEXT: dup v1.4h, w1 +; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v1.4h +; CHECK-GI-NEXT: ret + %ins1 = insertelement <2 x i16> poison, i16 %a, i32 0 + %splat1 = shufflevector <2 x i16> %ins1, <2 x i16> poison, <2 x i32> <i32 0, i32 0> + %ins2 = insertelement <2 x i16> poison, i16 %b, i32 0 + %splat2 = shufflevector <2 x i16> %ins2, <2 x i16> poison, <2 x i32> <i32 0, i32 0> + %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> %splat1, <2 x i16> %splat2) + ret <4 x i16> %retval +} diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll index 52cb2d9..c7fb2db 100644 --- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll @@ -267,7 +267,7 @@ define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscal ; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; SME2-NEXT: zip { z0.h - z3.h }, { z0.h - z3.h } ; SME2-NEXT: ret - %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) + %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv32i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3) ret <vscale x 32 x i16> %retval } @@ -540,30 +540,81 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale ret <vscale x 4 x i32> %retval } -; Float declarations -declare <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half>, <vscale x 2 x half>) -declare <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half>, <vscale x 4 x half>) -declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 4 x float> @llvm.vector.interleave2.nxv4f32(<vscale x 2 x float>, <vscale x 2 x float>) -declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>) +define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() { +; CHECK-LABEL: interleave2_same_const_splat_nxv4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, #3 // =0x3 +; CHECK-NEXT: ret + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() { +; SVE-LABEL: interleave2_diff_const_splat_nxv4i16: +; SVE: // %bb.0: +; SVE-NEXT: mov z0.d, #4 // =0x4 +; SVE-NEXT: mov z1.d, #3 // =0x3 +; SVE-NEXT: zip2 z2.d, z1.d, z0.d +; SVE-NEXT: zip1 z0.d, z1.d, z0.d +; SVE-NEXT: uzp1 z0.s, z0.s, z2.s +; SVE-NEXT: ret +; +; SME2-LABEL: interleave2_diff_const_splat_nxv4i16: +; SME2: // %bb.0: +; SME2-NEXT: mov z0.d, #4 // =0x4 +; SME2-NEXT: mov z1.d, #3 // =0x3 +; SME2-NEXT: zip { z0.d, z1.d }, z1.d, z0.d +; SME2-NEXT: uzp1 z0.s, z0.s, z1.s +; SME2-NEXT: ret + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4)) + ret <vscale x 4 x i16> %retval +} -; Integer declarations -declare <vscale x 32 x i8> @llvm.vector.interleave2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>) -declare <vscale x 16 x i16> @llvm.vector.interleave2.nxv16i16(<vscale x 8 x i16>, <vscale x 8 x i16>) -declare <vscale x 8 x i32> @llvm.vector.interleave2.nxv8i32(<vscale x 4 x i32>, <vscale x 4 x i32>) -declare <vscale x 4 x i64> @llvm.vector.interleave2.nxv4i64(<vscale x 2 x i64>, <vscale x 2 x i64>) +define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) { +; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.s, w0 +; CHECK-NEXT: ret + %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0 + %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat) + ret <vscale x 4 x i16> %retval +} -; Predicated -declare <vscale x 32 x i1> @llvm.vector.interleave2.nxv32i1(<vscale x 16 x i1>, <vscale x 16 x i1>) -declare <vscale x 16 x i1> @llvm.vector.interleave2.nxv16i1(<vscale x 8 x i1>, <vscale x 8 x i1>) -declare <vscale x 8 x i1> @llvm.vector.interleave2.nxv8i1(<vscale x 4 x i1>, <vscale x 4 x i1>) -declare <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1>, <vscale x 2 x i1>) - -; Illegal type size -declare <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32>, <vscale x 8 x i32>) -declare <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64>, <vscale x 4 x i64>) - -declare <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8>, <vscale x 8 x i8>) -declare <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>) -declare <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>) +define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) { +; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; SVE: // %bb.0: +; SVE-NEXT: // kill: def $w1 killed $w1 def $x1 +; SVE-NEXT: // kill: def $w0 killed $w0 def $x0 +; SVE-NEXT: mov z0.d, x0 +; SVE-NEXT: mov z1.d, x1 +; SVE-NEXT: zip2 z2.d, z0.d, z1.d +; SVE-NEXT: zip1 z0.d, z0.d, z1.d +; SVE-NEXT: uzp1 z0.s, z0.s, z2.s +; SVE-NEXT: ret +; +; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; SME2: // %bb.0: +; SME2-NEXT: // kill: def $w1 killed $w1 def $x1 +; SME2-NEXT: // kill: def $w0 killed $w0 def $x0 +; SME2-NEXT: mov z0.d, x0 +; SME2-NEXT: mov z1.d, x1 +; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z1.d +; SME2-NEXT: uzp1 z0.s, z0.s, z1.s +; SME2-NEXT: ret + %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0 + %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() { +; CHECK-LABEL: interleave4_same_const_splat_nxv8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.h, #3 // =0x3 +; CHECK-NEXT: ret + %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 8 x i16> %retval +} diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll index 9306c20..7dcd56c 100644 --- a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll @@ -1,14 +1,14 @@ -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s |FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+sve < %s | FileCheck %s -declare i32 @llvm.vscale.i32() -declare i64 @llvm.vscale.i64() +target triple = "aarch64-unknown-linux-gnu" ; Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). define i64 @combine_add_vscale_i64() nounwind { ; CHECK-LABEL: combine_add_vscale_i64: -; CHECK-NOT: add -; CHECK-NEXT: cntd x0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x0 +; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %add = add i64 %vscale, %vscale ret i64 %add @@ -16,9 +16,10 @@ define i64 @combine_add_vscale_i64() nounwind { define i32 @combine_add_vscale_i32() nounwind { ; CHECK-LABEL: combine_add_vscale_i32: -; CHECK-NOT: add -; CHECK-NEXT: cntd x0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: cntd x0 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %add = add i32 %vscale, %vscale ret i32 %add @@ -28,9 +29,9 @@ define i32 @combine_add_vscale_i32() nounwind { ; In this test, C0 = 1, C1 = 32. define i64 @combine_mul_vscale_i64() nounwind { ; CHECK-LABEL: combine_mul_vscale_i64: -; CHECK-NOT: mul -; CHECK-NEXT: rdvl x0, #2 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x0, #2 +; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %mul = mul i64 %vscale, 32 ret i64 %mul @@ -38,9 +39,10 @@ define i64 @combine_mul_vscale_i64() nounwind { define i32 @combine_mul_vscale_i32() nounwind { ; CHECK-LABEL: combine_mul_vscale_i32: -; CHECK-NOT: mul -; CHECK-NEXT: rdvl x0, #3 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x0, #3 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %mul = mul i32 %vscale, 48 ret i32 %mul @@ -49,11 +51,11 @@ define i32 @combine_mul_vscale_i32() nounwind { ; Canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) define i64 @combine_sub_vscale_i64(i64 %in) nounwind { ; CHECK-LABEL: combine_sub_vscale_i64: -; CHECK-NOT: sub -; CHECK-NEXT: rdvl x8, #-1 -; CHECK-NEXT: asr x8, x8, #4 -; CHECK-NEXT: add x0, x0, x8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #-1 +; CHECK-NEXT: asr x8, x8, #4 +; CHECK-NEXT: add x0, x0, x8 +; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %sub = sub i64 %in, %vscale ret i64 %sub @@ -61,11 +63,11 @@ define i64 @combine_sub_vscale_i64(i64 %in) nounwind { define i32 @combine_sub_vscale_i32(i32 %in) nounwind { ; CHECK-LABEL: combine_sub_vscale_i32: -; CHECK-NOT: sub -; CHECK-NEXT: rdvl x8, #-1 -; CHECK-NEXT: asr x8, x8, #4 -; CHECK-NEXT: add w0, w0, w8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #-1 +; CHECK-NEXT: asr x8, x8, #4 +; CHECK-NEXT: add w0, w0, w8 +; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %sub = sub i32 %in, %vscale ret i32 %sub @@ -75,12 +77,13 @@ define i32 @combine_sub_vscale_i32(i32 %in) nounwind { ; (sub X, (vscale * C)) to (add X, (vscale * -C)) define i64 @multiple_uses_sub_vscale_i64(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: multiple_uses_sub_vscale_i64: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: sub x9, x0, x8 -; CHECK-NEXT: add x8, x1, x8 -; CHECK-NEXT: mul x0, x9, x8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: sub x9, x0, x8 +; CHECK-NEXT: add x8, x1, x8 +; CHECK-NEXT: mul x0, x9, x8 +; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %sub = sub i64 %x, %vscale %add = add i64 %y, %vscale @@ -95,9 +98,9 @@ define i64 @multiple_uses_sub_vscale_i64(i64 %x, i64 %y) nounwind { ; Hence, the immediate for RDVL is #1. define i64 @combine_shl_vscale_i64() nounwind { ; CHECK-LABEL: combine_shl_vscale_i64: -; CHECK-NOT: shl -; CHECK-NEXT: rdvl x0, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x0, #1 +; CHECK-NEXT: ret %vscale = call i64 @llvm.vscale.i64() %shl = shl i64 %vscale, 4 ret i64 %shl @@ -105,10 +108,38 @@ define i64 @combine_shl_vscale_i64() nounwind { define i32 @combine_shl_vscale_i32() nounwind { ; CHECK-LABEL: combine_shl_vscale_i32: -; CHECK-NOT: shl -; CHECK-NEXT: rdvl x0, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: rdvl x0, #1 +; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %vscale = call i32 @llvm.vscale.i32() %shl = shl i32 %vscale, 4 ret i32 %shl } + +define i64 @combine_shl_mul_vscale(i64 %a) nounwind { +; CHECK-LABEL: combine_shl_mul_vscale: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: mul x0, x0, x8 +; CHECK-NEXT: ret + %vscale = tail call i64 @llvm.vscale.i64() + %mul = mul i64 %a, %vscale + %shl = shl i64 %mul, 3 + ret i64 %shl +} + +define i64 @combine_shl_mul_vscale_commuted(i64 %a) nounwind { +; CHECK-LABEL: combine_shl_mul_vscale_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: mul x0, x0, x8 +; CHECK-NEXT: ret + %vscale = tail call i64 @llvm.vscale.i64() + %mul = mul i64 %vscale, %a + %shl = shl i64 %mul, 3 + ret i64 %shl +} + +declare i32 @llvm.vscale.i32() +declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir new file mode 100644 index 0000000..7336a54 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-lower-sgpr-spills,greedy,si-lower-wwm-copies,virtregrewriter,prologepilog -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: widget +tracksRegLiveness: true +frameInfo: + adjustsStack: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } + - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: widget + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 -1 + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62 + ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $agpr0 = COPY killed renamable $vgpr62 + ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg + ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF + ; GCN-NEXT: dead renamable $vgpr62 = V_AND_B32_e32 1, killed $vgpr62, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: liveins: $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; GCN-NEXT: liveins: $agpr0, $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.4 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $vgpr62 = COPY renamable $agpr0 + ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg + ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr62, 1 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GCN-NEXT: $exec = S_MOV_B64 -1 + ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: SI_RETURN + bb.0: + liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15 + + %45:vgpr_32 = IMPLICIT_DEF + SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + %16:vgpr_32 = V_AND_B32_e32 1, %45, implicit $exec + + bb.1: + successors: %bb.3, %bb.2 + + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + successors: %bb.4(0x04000000), %bb.1(0x7c000000) + liveins: $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99 + + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.4 + + bb.3: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr14 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5) + ADJCALLSTACKDOWN 0, 28, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_BRANCH %bb.2 + + bb.4: + SI_RETURN + +... diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll b/llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll new file mode 100644 index 0000000..892955c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/inline-asm-out-of-bounds-register.ll @@ -0,0 +1,98 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -filetype=null %s 2>&1 | FileCheck -implicit-check-not=error %s + +; CHECK: error: couldn't allocate output register for constraint '{v256}' +define void @out_of_bounds_vgpr32_def() { + %v = tail call i32 asm sideeffect "v_mov_b32 $0, -1", "={v256}"() + ret void +} + +; CHECK: error: couldn't allocate output register for constraint '{v[255:256]}' +define void @out_of_bounds_vgpr64_def_high_tuple() { + %v = tail call i32 asm sideeffect "v_mov_b32 $0, -1", "={v[255:256]}"() + ret void +} + +; CHECK: error: couldn't allocate output register for constraint '{v[256:257]}' +define void @out_of_bounds_vgpr64_def_low_tuple() { + %v = tail call i32 asm sideeffect "v_mov_b32 $0, -1", "={v[256:257]}"() + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v256}' +define void @out_of_bounds_vgpr32_use() { + %v = tail call i32 asm sideeffect "v_mov_b32 %0, %1", "=v,{v256}"(i32 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[255:256]}' +define void @out_of_bounds_vgpr64_high_tuple() { + tail call void asm sideeffect "; use %0", "{v[255:256]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[256:257]}' +define void @out_of_bounds_vgpr64_low_tuple() { + tail call void asm sideeffect "; use %0", "{v[256:257]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[1:0]}' +define void @vgpr_tuple_swapped() { + tail call void asm sideeffect "; use %0", "{v[1:0]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v4294967295}' +define void @vgpr_uintmax() { + tail call void asm sideeffect "; use %0", "{v4294967295}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v4294967296}' +define void @vgpr_uintmax_p1() { + tail call void asm sideeffect "; use %0", "{v4294967296}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[4294967295:4294967296]}' +define void @vgpr_tuple_uintmax() { + tail call void asm sideeffect "; use %0", "{v[4294967295:4294967296]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[0:4294967295]}' +define void @vgpr_tuple_0_uintmax() { + tail call void asm sideeffect "; use %0", "{v[0:4294967295]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[0:4294967296]}' +define void @vgpr_tuple_0_uintmax_p1() { + tail call void asm sideeffect "; use %0", "{v[0:4294967296]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[4294967264:4294967295]}' +define void @vgpr32_last_is_uintmax() { + tail call void asm sideeffect "; use %0", "{v[4294967264:4294967295]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[4294967265:4294967296]}' +define void @vgpr32_last_is_uintmax_p1() { + tail call void asm sideeffect "; use %0", "{v[4294967265:4294967296]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[2:2147483651]}' +define void @overflow_bitwidth_0() { + tail call void asm sideeffect "; use %0", "{v[2:2147483651]}"(i64 123) + ret void +} + +; CHECK: error: couldn't allocate input reg for constraint '{v[2147483635:2147483651]}' +define void @overflow_bitwidth_1() { + tail call void asm sideeffect "; use %0", "{v[2147483635:2147483651]}"(i64 123) + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll new file mode 100644 index 0000000..cd0b081 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.scalef32.pk8.ll @@ -0,0 +1,403 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-SDAG %s +; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GFX1250-GISEL %s + +declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %src, float %scale) +declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %src, float %scale) +declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %src, float %scale) +declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %src, float %scale) +declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %src, float %scale) +declare <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %src, float %scale) +declare i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %src, float %scale) +declare i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %src, float %scale) +declare i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %src, float %scale) + +define amdgpu_ps void @test_scalef32_pk8_fp8_bf16_vv(<8 x bfloat> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_bf16_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[8:9], v[0:3], v4 +; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_bf16_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[8:9], v[0:3], v4 +; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %src, float %scale) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp8_bf16_sl(<8 x bfloat> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_bf16_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_bf16_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_bf16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.bf16(<8 x bfloat> %src, float 100.0) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_bf8_bf16_vv(<8 x bfloat> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_bf16_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[8:9], v[0:3], v4 +; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_bf16_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[8:9], v[0:3], v4 +; GFX1250-GISEL-NEXT: global_store_b64 v[6:7], v[8:9], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %src, float %scale) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_bf8_bf16_sl(<8 x bfloat> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_bf16_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_bf16_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_bf16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.bf16(<8 x bfloat> %src, float 100.0) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp8_f16_vv(<8 x half> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f16_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[8:9], v[0:3], v4 +; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f16_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v9, v6 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[6:7], v[0:3], v4 +; GFX1250-GISEL-NEXT: global_store_b64 v[8:9], v[6:7], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %src, float %scale) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp8_f16_sl(<8 x half> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f16_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f16_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f16(<8 x half> %src, float 100.0) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_bf8_f16_vv(<8 x half> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f16_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[8:9], v[0:3], v4 +; GFX1250-SDAG-NEXT: global_store_b64 v[6:7], v[8:9], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f16_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v9, v6 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[6:7], v[0:3], v4 +; GFX1250-GISEL-NEXT: global_store_b64 v[8:9], v[6:7], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %src, float %scale) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_bf8_f16_sl(<8 x half> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f16_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f16_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f16 v[6:7], v[2:5], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[6:7], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f16(<8 x half> %src, float 100.0) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_bf8_f32_vv(<8 x float> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f32_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v9 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[12:13], v[0:7], v8 +; GFX1250-SDAG-NEXT: global_store_b64 v[10:11], v[12:13], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f32_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v10 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[0:7], v8 +; GFX1250-GISEL-NEXT: global_store_b64 v[12:13], v[10:11], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %src, float %scale) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_bf8_f32_sl(<8 x float> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_bf8_f32_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[2:9], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[10:11], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_bf8_f32_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[2:9], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[10:11], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.bf8.f32(<8 x float> %src, float 100.0) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp8_f32_vv(<8 x float> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f32_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v9 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[12:13], v[0:7], v8 +; GFX1250-SDAG-NEXT: global_store_b64 v[10:11], v[12:13], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f32_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v10 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[0:7], v8 +; GFX1250-GISEL-NEXT: global_store_b64 v[12:13], v[10:11], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %src, float %scale) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp8_f32_sl(<8 x float> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp8_f32_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[2:9], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b64 v[0:1], v[10:11], off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp8_f32_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[2:9], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b64 v[0:1], v[10:11], off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call <2 x i32> @llvm.amdgcn.cvt.scalef32.pk8.fp8.f32(<8 x float> %src, float 100.0) + store <2 x i32> %cvt, ptr addrspace(1) %out, align 8 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp4_f32_vv(<8 x float> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f32_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v11, v10 :: v_dual_mov_b32 v10, v9 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f32 v9, v[0:7], v8 +; GFX1250-SDAG-NEXT: global_store_b32 v[10:11], v9, off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f32_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v10 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f32 v9, v[0:7], v8 +; GFX1250-GISEL-NEXT: global_store_b32 v[12:13], v9, off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %src, float %scale) + store i32 %cvt, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp4_f32_sl(<8 x float> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f32_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v6, s4 :: v_dual_mov_b32 v7, s5 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v8, s6 :: v_dual_mov_b32 v9, s7 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f32 v10, v[2:9], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v10, off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f32_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[6:7] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f32 v10, v[2:9], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v10, off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f32(<8 x float> %src, float 100.0) + store i32 %cvt, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp4_f16_vv(<8 x half> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f16_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f16 v5, v[0:3], v4 +; GFX1250-SDAG-NEXT: global_store_b32 v[6:7], v5, off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f16_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v9, v6 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f16 v5, v[0:3], v4 +; GFX1250-GISEL-NEXT: global_store_b32 v[8:9], v5, off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %src, float %scale) + store i32 %cvt, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp4_f16_sl(<8 x half> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_f16_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_f16 v6, v[2:5], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v6, off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_f16_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1250-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_f16 v6, v[2:5], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v6, off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.f16(<8 x half> %src, float 100.0) + store i32 %cvt, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp4_bf16_vv(<8 x bfloat> %src, float %scale, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_bf16_vv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v5, v[0:3], v4 +; GFX1250-SDAG-NEXT: global_store_b32 v[6:7], v5, off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_bf16_vv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v6, v5 +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v5, v[0:3], v4 +; GFX1250-GISEL-NEXT: global_store_b32 v[6:7], v5, off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %src, float %scale) + store i32 %cvt, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_ps void @test_scalef32_pk8_fp4_bf16_sl(<8 x bfloat> inreg %src, ptr addrspace(1) %out) { +; GFX1250-SDAG-LABEL: test_scalef32_pk8_fp4_bf16_sl: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-SDAG-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v6, v[2:5], 0x42c80000 +; GFX1250-SDAG-NEXT: global_store_b32 v[0:1], v6, off +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: test_scalef32_pk8_fp4_bf16_sl: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_cvt_scalef32_pk8_fp4_bf16 v6, v[2:5], 0x42c80000 +; GFX1250-GISEL-NEXT: global_store_b32 v[0:1], v6, off +; GFX1250-GISEL-NEXT: s_endpgm + %cvt = tail call i32 @llvm.amdgcn.cvt.scalef32.pk8.fp4.bf16(<8 x bfloat> %src, float 100.0) + store i32 %cvt, ptr addrspace(1) %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll new file mode 100644 index 0000000..4f7bbf8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.gfx1250.ll @@ -0,0 +1,416 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-SDAG %s +; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-GISEL %s + +define amdgpu_kernel void @v_permlane_bcast_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { +; GFX1250-LABEL: v_permlane_bcast_b32_vss: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, s3, s4 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %src1, i32 %src2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_bcast_b32_vii(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_bcast_b32_vii: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, 1, 2 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 1, i32 2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_bcast_b32_vll(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_bcast_b32_vll: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_movk_i32 s2, 0x64 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_permlane_bcast_b32 v0, v0, s2, 0x66 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 100, i32 102) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_bcast_b32_vvv(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-SDAG-LABEL: v_permlane_bcast_b32_vvv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_permlane_bcast_b32 v1, v1, s3, s2 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: v_permlane_bcast_b32_vvv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_permlane_bcast_b32 v0, v0, s3, s4 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v = call i32 @llvm.amdgcn.permlane.bcast(i32 %src0, i32 %tidx, i32 %tidy) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_down_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { +; GFX1250-LABEL: v_permlane_down_b32_vss: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_down_b32 v0, v0, s3, s4 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %src1, i32 %src2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_down_b32_vii(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_down_b32_vii: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_down_b32 v0, v0, 1, 2 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 1, i32 2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_down_b32_vll(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_down_b32_vll: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_movk_i32 s2, 0x64 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_permlane_down_b32 v0, v0, s2, 0x66 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 100, i32 102) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_down_b32_vvv(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-SDAG-LABEL: v_permlane_down_b32_vvv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_permlane_down_b32 v1, v1, s3, s2 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: v_permlane_down_b32_vvv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_permlane_down_b32 v0, v0, s3, s4 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v = call i32 @llvm.amdgcn.permlane.down(i32 %src0, i32 %tidx, i32 %tidy) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_up_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { +; GFX1250-LABEL: v_permlane_up_b32_vss: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_up_b32 v0, v0, s3, s4 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %src1, i32 %src2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_up_b32_vii(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_up_b32_vii: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_up_b32 v0, v0, 1, 2 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 1, i32 2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_up_b32_vll(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_up_b32_vll: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_movk_i32 s2, 0x64 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_permlane_up_b32 v0, v0, s2, 0x66 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 100, i32 102) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_up_b32_vvv(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-SDAG-LABEL: v_permlane_up_b32_vvv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_permlane_up_b32 v1, v1, s3, s2 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: v_permlane_up_b32_vvv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_permlane_up_b32 v0, v0, s3, s4 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v = call i32 @llvm.amdgcn.permlane.up(i32 %src0, i32 %tidx, i32 %tidy) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_xor_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) { +; GFX1250-LABEL: v_permlane_xor_b32_vss: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b32 s4, s[4:5], 0x34 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, s3, s4 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %src1, i32 %src2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_xor_b32_vii(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_xor_b32_vii: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, 1, 2 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 1, i32 2) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_xor_b32_vll(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_xor_b32_vll: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_movk_i32 s2, 0x64 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: v_permlane_xor_b32 v0, v0, s2, 0x66 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 100, i32 102) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_xor_b32_vvv(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-SDAG-LABEL: v_permlane_xor_b32_vvv: +; GFX1250-SDAG: ; %bb.0: +; GFX1250-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-SDAG-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-SDAG-NEXT: s_wait_kmcnt 0x0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-SDAG-NEXT: v_readfirstlane_b32 s2, v0 +; GFX1250-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-SDAG-NEXT: v_permlane_xor_b32 v1, v1, s3, s2 +; GFX1250-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-SDAG-NEXT: s_endpgm +; +; GFX1250-GISEL-LABEL: v_permlane_xor_b32_vvv: +; GFX1250-GISEL: ; %bb.0: +; GFX1250-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, 0x3ff, v0 +; GFX1250-GISEL-NEXT: v_bfe_u32 v0, v0, 10, 10 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s3, v1 +; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 +; GFX1250-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1250-GISEL-NEXT: s_wait_kmcnt 0x0 +; GFX1250-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-GISEL-NEXT: v_permlane_xor_b32 v0, v0, s3, s4 +; GFX1250-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-GISEL-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v = call i32 @llvm.amdgcn.permlane.xor(i32 %src0, i32 %tidx, i32 %tidy) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_idx_gen_b32_vs(ptr addrspace(1) %out, i32 %src0, i32 %src1) { +; GFX1250-LABEL: v_permlane_idx_gen_b32_vs: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, s3 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 %src1) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_idx_gen_b32_vi(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_idx_gen_b32_vi: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, 1 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 1) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_idx_gen_b32_vl(ptr addrspace(1) %out, i32 %src0) { +; GFX1250-LABEL: v_permlane_idx_gen_b32_vl: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, 0x64 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %src0, i32 100) + store i32 %v, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @v_permlane_idx_gen_b32_vv(ptr addrspace(1) %out) { +; GFX1250-LABEL: v_permlane_idx_gen_b32_vv: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_bfe_u32 v1, v0, 10, 10 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1250-NEXT: v_readfirstlane_b32 s2, v1 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: v_permlane_idx_gen_b32 v0, v0, s2 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm + %tidx = call i32 @llvm.amdgcn.workitem.id.x() + %tidy = call i32 @llvm.amdgcn.workitem.id.y() + %v = call i32 @llvm.amdgcn.permlane.idx.gen(i32 %tidx, i32 %tidy) + store i32 %v, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll index 22ebb55..702a69f 100644 --- a/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll +++ b/llvm/test/CodeGen/AMDGPU/loop-prefetch-data.ll @@ -400,9 +400,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-NEXT: s_wait_alu 0xf1ff @@ -438,9 +438,9 @@ define amdgpu_kernel void @copy_flat_divergent(ptr nocapture %d, ptr nocapture r ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0 -; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff @@ -531,9 +531,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d, ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-NEXT: s_wait_alu 0xf1ff @@ -569,9 +569,9 @@ define amdgpu_kernel void @copy_global_divergent(ptr addrspace(1) nocapture %d, ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-SPREFETCH-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX12-SPREFETCH-NEXT: s_wait_kmcnt 0x0 -; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, s6, v0 +; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, s1, v0, s6 ; GFX12-SPREFETCH-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, s7, 0, s1 +; GFX12-SPREFETCH-NEXT: v_add_co_ci_u32_e64 v3, null, 0, s7, s1 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v0, s1, s4, v0 ; GFX12-SPREFETCH-NEXT: v_add_co_u32 v2, vcc_lo, 0xb0, v2 ; GFX12-SPREFETCH-NEXT: s_wait_alu 0xf1ff diff --git a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll index 05a0b1a..836e88c 100644 --- a/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll +++ b/llvm/test/CodeGen/AMDGPU/mad_u64_u32.ll @@ -35,13 +35,6 @@ define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @mad_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) { -; GCN-LABEL: mad_i32_sss: -; GCN: ; %bb.0: -; GCN-NEXT: s_mul_i32 s0, s0, s1 -; GCN-NEXT: s_add_i32 s0, s0, s2 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: ; return to shader part epilog -; ; GFX9-LABEL: mad_i32_sss: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_mul_i32 s0, s0, s1 diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll index be02045..4c0ab91 100644 --- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll +++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll @@ -6982,7 +6982,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 ; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0x800 ; CHECK-NEXT: s_cbranch_scc1 .LBB6_2 -; CHECK-NEXT: .LBB6_3: ; %Flow9 +; CHECK-NEXT: .LBB6_3: ; %Flow7 ; CHECK-NEXT: s_andn2_saveexec_b32 s8, s6 ; CHECK-NEXT: s_cbranch_execz .LBB6_6 ; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader @@ -7048,7 +7048,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; CHECK-NEXT: global_store_dwordx4 v[100:101], v[96:99], off offset:16 ; CHECK-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] ; CHECK-NEXT: s_cbranch_scc0 .LBB6_5 -; CHECK-NEXT: .LBB6_6: ; %Flow10 +; CHECK-NEXT: .LBB6_6: ; %Flow8 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_setpc_b64 s[30:31] ; @@ -7689,7 +7689,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 ; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 ; ALIGNED-NEXT: s_cbranch_scc1 .LBB6_2 -; ALIGNED-NEXT: .LBB6_3: ; %Flow9 +; ALIGNED-NEXT: .LBB6_3: ; %Flow7 ; ALIGNED-NEXT: s_andn2_saveexec_b32 s8, s6 ; ALIGNED-NEXT: s_cbranch_execz .LBB6_6 ; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader @@ -8316,7 +8316,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; ALIGNED-NEXT: global_store_byte v[16:17], v11, off offset:3 ; ALIGNED-NEXT: global_store_byte v[16:17], v4, off offset:1 ; ALIGNED-NEXT: s_cbranch_scc0 .LBB6_5 -; ALIGNED-NEXT: .LBB6_6: ; %Flow10 +; ALIGNED-NEXT: .LBB6_6: ; %Flow8 ; ALIGNED-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; ALIGNED-NEXT: s_clause 0x7 ; ALIGNED-NEXT: buffer_load_dword v47, off, s[0:3], s32 @@ -8369,7 +8369,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; UNROLL3-NEXT: global_store_dwordx4 v[0:1], v[2:5], off offset:2032 ; UNROLL3-NEXT: ; implicit-def: $vgpr2_vgpr3 ; UNROLL3-NEXT: ; implicit-def: $vgpr0_vgpr1 -; UNROLL3-NEXT: .LBB6_4: ; %Flow7 +; UNROLL3-NEXT: .LBB6_4: ; %Flow5 ; UNROLL3-NEXT: s_andn2_saveexec_b32 s8, s6 ; UNROLL3-NEXT: s_cbranch_execz .LBB6_7 ; UNROLL3-NEXT: ; %bb.5: ; %memmove_bwd_residual @@ -8403,7 +8403,7 @@ define void @memmove_p1_p1_sz2048(ptr addrspace(1) align 1 %dst, ptr addrspace(1 ; UNROLL3-NEXT: global_store_dwordx4 v[16:17], v[12:15], off offset:32 ; UNROLL3-NEXT: s_cmp_eq_u64 s[4:5], s[6:7] ; UNROLL3-NEXT: s_cbranch_scc0 .LBB6_6 -; UNROLL3-NEXT: .LBB6_7: ; %Flow8 +; UNROLL3-NEXT: .LBB6_7: ; %Flow6 ; UNROLL3-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; UNROLL3-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll index 272daa9..dd5c247 100644 --- a/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll +++ b/llvm/test/CodeGen/AMDGPU/memmove-var-size.ll @@ -460,10 +460,10 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] ; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6 ; CHECK-NEXT: s_cbranch_execnz .LBB3_3 -; CHECK-NEXT: ; %bb.1: ; %Flow34 +; CHECK-NEXT: ; %bb.1: ; %Flow36 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB3_10 -; CHECK-NEXT: .LBB3_2: ; %Flow35 +; CHECK-NEXT: .LBB3_2: ; %Flow37 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -494,7 +494,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB3_5 -; CHECK-NEXT: .LBB3_6: ; %Flow29 +; CHECK-NEXT: .LBB3_6: ; %Flow31 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_and_saveexec_b32 s8, s4 ; CHECK-NEXT: s_cbranch_execz .LBB3_9 @@ -520,7 +520,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB3_8 -; CHECK-NEXT: .LBB3_9: ; %Flow27 +; CHECK-NEXT: .LBB3_9: ; %Flow29 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 ; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 @@ -556,7 +556,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_cbranch_execnz .LBB3_12 -; CHECK-NEXT: .LBB3_13: ; %Flow33 +; CHECK-NEXT: .LBB3_13: ; %Flow35 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB3_16 @@ -584,7 +584,7 @@ define void @memmove_p0_p4(ptr addrspace(0) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11] ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB3_15 -; CHECK-NEXT: .LBB3_16: ; %Flow31 +; CHECK-NEXT: .LBB3_16: ; %Flow33 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) @@ -907,10 +907,10 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_cmpx_ge_u64_e64 v[2:3], v[0:1] ; CHECK-NEXT: s_xor_b32 s7, exec_lo, s6 ; CHECK-NEXT: s_cbranch_execnz .LBB6_3 -; CHECK-NEXT: ; %bb.1: ; %Flow41 +; CHECK-NEXT: ; %bb.1: ; %Flow39 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB6_10 -; CHECK-NEXT: .LBB6_2: ; %Flow42 +; CHECK-NEXT: .LBB6_2: ; %Flow40 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .LBB6_3: ; %memmove_copy_forward @@ -940,7 +940,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v11, null, 0, v11, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB6_5 -; CHECK-NEXT: .LBB6_6: ; %Flow36 +; CHECK-NEXT: .LBB6_6: ; %Flow34 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_and_saveexec_b32 s8, s4 ; CHECK-NEXT: s_cbranch_execz .LBB6_9 @@ -966,11 +966,11 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, 0, v1, s6 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; CHECK-NEXT: s_cbranch_execnz .LBB6_8 -; CHECK-NEXT: .LBB6_9: ; %Flow34 +; CHECK-NEXT: .LBB6_9: ; %Flow32 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 -; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 @@ -1002,15 +1002,15 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: v_add_co_ci_u32_e64 v5, null, -1, v5, s5 ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: s_cbranch_execnz .LBB6_12 -; CHECK-NEXT: .LBB6_13: ; %Flow40 +; CHECK-NEXT: .LBB6_13: ; %Flow38 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB6_16 ; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader -; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16 ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: s_mov_b32 s7, 0 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB6_15: ; %memmove_bwd_main_loop @@ -1030,7 +1030,7 @@ define void @memmove_p1_p1(ptr addrspace(1) align 1 %dst, ptr addrspace(1) align ; CHECK-NEXT: global_store_dwordx4 v[12:13], v[8:11], off ; CHECK-NEXT: s_andn2_b32 exec_lo, exec_lo, s7 ; CHECK-NEXT: s_cbranch_execnz .LBB6_15 -; CHECK-NEXT: .LBB6_16: ; %Flow38 +; CHECK-NEXT: .LBB6_16: ; %Flow36 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s6 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -1181,8 +1181,8 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: .LBB8_9: ; %Flow31 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 -; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CHECK-NEXT: ; implicit-def: $vgpr8_vgpr9 ; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 ; CHECK-NEXT: s_andn2_saveexec_b32 s6, s7 @@ -1219,10 +1219,10 @@ define void @memmove_p1_p4(ptr addrspace(1) align 1 %dst, ptr addrspace(4) align ; CHECK-NEXT: s_and_saveexec_b32 s5, vcc_lo ; CHECK-NEXT: s_cbranch_execz .LBB8_16 ; CHECK-NEXT: ; %bb.14: ; %memmove_bwd_main_loop.preheader -; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 -; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, -16 ; CHECK-NEXT: v_add_co_ci_u32_e64 v1, null, -1, v1, vcc_lo +; CHECK-NEXT: v_add_co_u32 v2, vcc_lo, v2, -16 +; CHECK-NEXT: v_add_co_ci_u32_e64 v3, null, -1, v3, vcc_lo ; CHECK-NEXT: s_mov_b32 s7, 0 ; CHECK-NEXT: .p2align 6 ; CHECK-NEXT: .LBB8_15: ; %memmove_bwd_main_loop diff --git a/llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll b/llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll new file mode 100644 index 0000000..c4a48a46 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/postra-sched-attribute.ll @@ -0,0 +1,34 @@ +; REQUIRES: asserts + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -debug-only=gcn-subtarget < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s 2>&1 | FileCheck -check-prefixes=WARNING %s + +; CHECK: Post-MI-sched direction (postra-sched-topdown): topdown +define float @postra-sched-topdown(float %input) nounwind #0 { + %x = fadd float %input, 1.000000e+00 + ret float %x +} + +; CHECK: Post-MI-sched direction (postra-sched-bottomup): bottomup +define float @postra-sched-bottomup(float %input) nounwind #1 { + %x = fsub float %input, 1.000000e+00 + ret float %x +} + +; CHECK: Post-MI-sched direction (postra-sched-bidirectional): bidirectional +define float @postra-sched-bidirectional(float %input) nounwind #2 { + %x = fadd float %input, 1.000000e+00 + ret float %x +} + +; CHECK: Post-MI-sched direction (postra-sched-warning): topdown +; WARNING: invalid value for postRA direction attribute +define float @postra-sched-warning(float %input) nounwind #3 { + %x = fsub float %input, 1.000000e+00 + ret float %x +} + +attributes #0 = {"amdgpu-post-ra-direction"="topdown"} +attributes #1 = {"amdgpu-post-ra-direction"="bottomup"} +attributes #2 = {"amdgpu-post-ra-direction"="bidirectional"} +attributes #3 = {"amdgpu-post-ra-direction"="warning"} diff --git a/llvm/test/CodeGen/AMDGPU/udivrem24.ll b/llvm/test/CodeGen/AMDGPU/udivrem24.ll index 5477d62..1e5ec59 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem24.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem24.ll @@ -1,18 +1,103 @@ -; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=VI %s +; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG %s -; FUNC-LABEL: {{^}}udiv24_i8: -; SI: v_cvt_f32_ubyte -; SI-DAG: v_cvt_f32_ubyte -; SI-DAG: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @udiv24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: udiv24_i8: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv24_i8: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv24_i8: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.X, T0.X, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, PS, PV.W, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1 %num = load i8, ptr addrspace(1) %in %den = load i8, ptr addrspace(1) %den_ptr @@ -21,17 +106,101 @@ define amdgpu_kernel void @udiv24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in ret void } -; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in_out: -; SI: v_cvt_f32_ubyte -; SI-DAG: v_cvt_f32_ubyte -; SI-DAG: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { +; SI-LABEL: udiv24_i8_denorm_flush_in_out: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv24_i8_denorm_flush_in_out: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv24_i8_denorm_flush_in_out: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.X, T0.X, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, PS, PV.W, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1 %num = load i8, ptr addrspace(1) %in %den = load i8, ptr addrspace(1) %den_ptr @@ -40,17 +209,101 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_in_out(ptr addrspace(1) %out, ret void } -; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_in: -; SI: v_cvt_f32_ubyte -; SI-DAG: v_cvt_f32_ubyte -; SI-DAG: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { +; SI-LABEL: udiv24_i8_denorm_flush_in: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv24_i8_denorm_flush_in: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv24_i8_denorm_flush_in: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.X, T0.X, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, PS, PV.W, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1 %num = load i8, ptr addrspace(1) %in %den = load i8, ptr addrspace(1) %den_ptr @@ -59,17 +312,101 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_in(ptr addrspace(1) %out, ptr ret void } -; FUNC-LABEL: {{^}}udiv24_i8_denorm_flush_out: -; SI: v_cvt_f32_ubyte -; SI-DAG: v_cvt_f32_ubyte -; SI-DAG: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @udiv24_i8_denorm_flush_out(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 { +; SI-LABEL: udiv24_i8_denorm_flush_out: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv24_i8_denorm_flush_out: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v1, v1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv24_i8_denorm_flush_out: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.X, T0.X, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, PS, PV.W, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1 %num = load i8, ptr addrspace(1) %in %den = load i8, ptr addrspace(1) %den_ptr @@ -78,17 +415,101 @@ define amdgpu_kernel void @udiv24_i8_denorm_flush_out(ptr addrspace(1) %out, ptr ret void } -; FUNC-LABEL: {{^}}udiv24_i16: -; SI: v_cvt_f32_u32 -; SI: v_cvt_f32_u32 -; SI: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @udiv24_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: udiv24_i16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_u32_e32 v0, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_u32_e32 v1, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv24_i16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 +; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_u32_e32 v0, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_u32_e32 v1, v1 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv24_i16: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 23, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 +; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.X, T0.X, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: ADD_INT * T1.W, PS, PV.W, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i16, ptr addrspace(1) %in, i16 1 %num = load i16, ptr addrspace(1) %in, align 2 %den = load i16, ptr addrspace(1) %den_ptr, align 2 @@ -97,17 +518,85 @@ define amdgpu_kernel void @udiv24_i16(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}udiv23_i32: -; SI: v_cvt_f32_u32 -; SI-DAG: v_cvt_f32_u32 -; SI-DAG: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @udiv23_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: udiv23_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0x7fffff +; SI-NEXT: s_and_b32 s5, s5, 0x7fffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv23_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s3, s3, 0x7fffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s3 +; VI-NEXT: s_and_b32 s2, s2, 0x7fffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv23_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) +; EG-NEXT: UINT_TO_FLT * T0.Y, PV.W, +; EG-NEXT: AND_INT T0.W, T0.X, literal.x, +; EG-NEXT: RECIP_IEEE * T0.X, PS, +; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) +; EG-NEXT: UINT_TO_FLT * T0.Z, PV.W, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: AND_INT T0.X, PV.W, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 8388607(1.175494e-38), 2(2.802597e-45) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -120,11 +609,88 @@ define amdgpu_kernel void @udiv23_i32(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}udiv24_i32: -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @udiv24_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: udiv24_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0xffffff +; SI-NEXT: s_and_b32 s5, s5, 0xffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv24_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s3, s3, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s3 +; VI-NEXT: s_and_b32 s2, s2, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv24_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: ADD_INT T0.Z, T0.X, 1, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS, +; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z, +; EG-NEXT: ADD_INT T3.W, PS, 1, +; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -137,11 +703,88 @@ define amdgpu_kernel void @udiv24_i32(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}no_udiv24_u23_u24_i32: -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @no_udiv24_u23_u24_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: no_udiv24_u23_u24_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0x7fffff +; SI-NEXT: s_and_b32 s5, s5, 0xffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: no_udiv24_u23_u24_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s3, s3, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s3 +; VI-NEXT: s_and_b32 s2, s2, 0x7fffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: no_udiv24_u23_u24_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: ADD_INT T0.Z, T0.X, 1, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS, +; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z, +; EG-NEXT: ADD_INT T3.W, PS, 1, +; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -154,11 +797,88 @@ define amdgpu_kernel void @no_udiv24_u23_u24_i32(ptr addrspace(1) %out, ptr addr ret void } -; FUNC-LABEL: {{^}}no_udiv24_u24_u23_i32: -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @no_udiv24_u24_u23_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: no_udiv24_u24_u23_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0xffffff +; SI-NEXT: s_and_b32 s5, s5, 0x7fffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: no_udiv24_u24_u23_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s3, s3, 0x7fffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s3 +; VI-NEXT: s_and_b32 s2, s2, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: no_udiv24_u24_u23_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: ADD_INT T0.Z, T0.X, 1, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS, +; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z, +; EG-NEXT: ADD_INT T3.W, PS, 1, +; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -171,14 +891,113 @@ define amdgpu_kernel void @no_udiv24_u24_u23_i32(ptr addrspace(1) %out, ptr addr ret void } -; FUNC-LABEL: {{^}}udiv25_i32: ; RCP_IFLAG is for URECIP in the full 32b alg -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 - -; EG-NOT: UINT_TO_FLT -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @udiv25_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: udiv25_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0x1ffffff +; SI-NEXT: s_and_b32 s5, s5, 0x1ffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s5 +; SI-NEXT: s_sub_i32 s6, 0, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-NEXT: v_mul_lo_u32 v1, s6, v0 +; SI-NEXT: v_mul_hi_u32 v1, v0, v1 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_mul_hi_u32 v0, s4, v0 +; SI-NEXT: v_readfirstlane_b32 s6, v0 +; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 +; SI-NEXT: s_mul_i32 s6, s6, s5 +; SI-NEXT: s_sub_i32 s4, s4, s6 +; SI-NEXT: s_sub_i32 s6, s4, s5 +; SI-NEXT: s_cmp_ge_u32 s4, s5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_cselect_b32 s4, s6, s4 +; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 +; SI-NEXT: s_cmp_ge_u32 s4, s5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: udiv25_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; VI-NEXT: s_sub_i32 s3, 0, s4 +; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; VI-NEXT: v_mul_lo_u32 v1, s3, v0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_mul_hi_u32 v1, v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_mul_hi_u32 v0, s5, v0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_mul_i32 s6, s6, s4 +; VI-NEXT: s_sub_i32 s5, s5, s6 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: s_cselect_b32 s5, s6, s5 +; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: udiv25_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: ADD_INT T0.Z, T0.X, 1, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS, +; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z, +; EG-NEXT: ADD_INT T3.W, PS, 1, +; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -191,14 +1010,113 @@ define amdgpu_kernel void @udiv25_i32(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}test_no_udiv24_i32_1: ; RCP_IFLAG is for URECIP in the full 32b alg -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 - -; EG-NOT: UINT_TO_FLT -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @test_no_udiv24_i32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: test_no_udiv24_i32_1: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0xffffff +; SI-NEXT: s_and_b32 s5, s5, 0x1ffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s5 +; SI-NEXT: s_sub_i32 s6, 0, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-NEXT: v_mul_lo_u32 v1, s6, v0 +; SI-NEXT: v_mul_hi_u32 v1, v0, v1 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_mul_hi_u32 v0, s4, v0 +; SI-NEXT: v_readfirstlane_b32 s6, v0 +; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 +; SI-NEXT: s_mul_i32 s6, s6, s5 +; SI-NEXT: s_sub_i32 s4, s4, s6 +; SI-NEXT: s_sub_i32 s6, s4, s5 +; SI-NEXT: s_cmp_ge_u32 s4, s5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_cselect_b32 s4, s6, s4 +; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 +; SI-NEXT: s_cmp_ge_u32 s4, s5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: test_no_udiv24_i32_1: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; VI-NEXT: s_sub_i32 s3, 0, s4 +; VI-NEXT: s_and_b32 s5, s2, 0xffffff +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; VI-NEXT: v_mul_lo_u32 v1, s3, v0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_mul_hi_u32 v1, v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_mul_hi_u32 v0, s5, v0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_mul_i32 s6, s6, s4 +; VI-NEXT: s_sub_i32 s5, s5, s6 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: s_cselect_b32 s5, s6, s5 +; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: test_no_udiv24_i32_1: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: ADD_INT T0.Z, T0.X, 1, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS, +; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z, +; EG-NEXT: ADD_INT T3.W, PS, 1, +; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -211,14 +1129,113 @@ define amdgpu_kernel void @test_no_udiv24_i32_1(ptr addrspace(1) %out, ptr addrs ret void } -; FUNC-LABEL: {{^}}test_no_udiv24_i32_2: ; RCP_IFLAG is for URECIP in the full 32b alg -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 - -; EG-NOT: UINT_TO_FLT -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @test_no_udiv24_i32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: test_no_udiv24_i32_2: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0x1ffffff +; SI-NEXT: s_and_b32 s5, s5, 0xffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s5 +; SI-NEXT: s_sub_i32 s6, 0, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-NEXT: v_mul_lo_u32 v1, s6, v0 +; SI-NEXT: v_mul_hi_u32 v1, v0, v1 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_mul_hi_u32 v0, s4, v0 +; SI-NEXT: v_readfirstlane_b32 s6, v0 +; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 +; SI-NEXT: s_mul_i32 s6, s6, s5 +; SI-NEXT: s_sub_i32 s4, s4, s6 +; SI-NEXT: s_sub_i32 s6, s4, s5 +; SI-NEXT: s_cmp_ge_u32 s4, s5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: s_cselect_b32 s4, s6, s4 +; SI-NEXT: v_add_i32_e32 v1, vcc, 1, v0 +; SI-NEXT: s_cmp_ge_u32 s4, s5 +; SI-NEXT: s_cselect_b64 vcc, -1, 0 +; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: test_no_udiv24_i32_2: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s3, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; VI-NEXT: s_sub_i32 s3, 0, s4 +; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; VI-NEXT: v_mul_lo_u32 v1, s3, v0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_mul_hi_u32 v1, v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_mul_hi_u32 v0, s5, v0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_mul_i32 s6, s6, s4 +; VI-NEXT: s_sub_i32 s5, s5, s6 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: s_cselect_b32 s5, s6, s5 +; VI-NEXT: v_add_u32_e32 v1, vcc, 1, v0 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b64 vcc, -1, 0 +; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: test_no_udiv24_i32_2: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 21, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: ADD_INT T0.Z, T0.X, 1, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T1.W, PV.W, T1.W, PS, +; EG-NEXT: CNDE_INT * T2.W, PV.W, T0.X, PV.Z, +; EG-NEXT: ADD_INT T3.W, PS, 1, +; EG-NEXT: SETGE_UINT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PS, T2.W, PV.W, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -231,17 +1248,107 @@ define amdgpu_kernel void @test_no_udiv24_i32_2(ptr addrspace(1) %out, ptr addrs ret void } -; FUNC-LABEL: {{^}}urem24_i8: -; SI: v_cvt_f32_ubyte -; SI-DAG: v_cvt_f32_ubyte -; SI-DAG: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @urem24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: urem24_i8: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 offset:1 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_ubyte0_e32 v3, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v4, v3 +; SI-NEXT: v_mul_f32_e32 v4, v2, v4 +; SI-NEXT: v_trunc_f32_e32 v4, v4 +; SI-NEXT: v_fma_f32 v2, -v4, v3, v2 +; SI-NEXT: v_cvt_u32_f32_e32 v4, v4 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, v3 +; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc +; SI-NEXT: v_mul_lo_u32 v1, v2, v1 +; SI-NEXT: v_subrev_i32_e32 v0, vcc, v1, v0 +; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: urem24_i8: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ubyte v0, off, s[8:11], 0 offset:1 +; VI-NEXT: buffer_load_ubyte v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v0 +; VI-NEXT: v_rcp_iflag_f32_e32 v3, v2 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_ubyte0_e32 v4, v1 +; VI-NEXT: v_mul_f32_e32 v3, v4, v3 +; VI-NEXT: v_trunc_f32_e32 v3, v3 +; VI-NEXT: v_cvt_u32_f32_e32 v5, v3 +; VI-NEXT: v_mad_f32 v3, -v3, v2, v4 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, v2 +; VI-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; VI-NEXT: v_mul_lo_u32 v0, v2, v0 +; VI-NEXT: v_subrev_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: urem24_i8: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 25, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_8 T1.X, T0.X, 1, #1 +; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.W, T0.X, +; EG-NEXT: MUL_IEEE * T1.W, PS, T0.Z, +; EG-NEXT: TRUNC * T1.W, PV.W, +; EG-NEXT: MULADD_IEEE T0.W, -PV.W, T0.Y, T0.W, +; EG-NEXT: TRUNC * T1.W, PV.W, +; EG-NEXT: SETGE * T0.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T0.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.Y, T1.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.X, +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: SUB_INT * T1.W, T0.X, PS, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i8, ptr addrspace(1) %in, i8 1 %num = load i8, ptr addrspace(1) %in %den = load i8, ptr addrspace(1) %den_ptr @@ -250,17 +1357,107 @@ define amdgpu_kernel void @urem24_i8(ptr addrspace(1) %out, ptr addrspace(1) %in ret void } -; FUNC-LABEL: {{^}}urem24_i16: -; SI: v_cvt_f32_u32 -; SI: v_cvt_f32_u32 -; SI: v_rcp_iflag_f32 -; SI: v_cvt_u32_f32 - -; EG: UINT_TO_FLT -; EG-DAG: UINT_TO_FLT -; EG-DAG: RECIP_IEEE -; EG: FLT_TO_UINT define amdgpu_kernel void @urem24_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: urem24_i16: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_mov_b32 s10, s6 +; SI-NEXT: s_mov_b32 s11, s7 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s8, s2 +; SI-NEXT: s_mov_b32 s9, s3 +; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 +; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: s_waitcnt vmcnt(1) +; SI-NEXT: v_cvt_f32_u32_e32 v2, v0 +; SI-NEXT: s_waitcnt vmcnt(0) +; SI-NEXT: v_cvt_f32_u32_e32 v3, v1 +; SI-NEXT: v_rcp_iflag_f32_e32 v4, v3 +; SI-NEXT: v_mul_f32_e32 v4, v2, v4 +; SI-NEXT: v_trunc_f32_e32 v4, v4 +; SI-NEXT: v_fma_f32 v2, -v4, v3, v2 +; SI-NEXT: v_cvt_u32_f32_e32 v4, v4 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v2|, v3 +; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc +; SI-NEXT: v_mul_lo_u32 v1, v2, v1 +; SI-NEXT: v_subrev_i32_e32 v0, vcc, v1, v0 +; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: urem24_i16: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_mov_b32 s7, 0xf000 +; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: s_mov_b32 s10, s6 +; VI-NEXT: s_mov_b32 s11, s7 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_mov_b32 s8, s2 +; VI-NEXT: s_mov_b32 s9, s3 +; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 +; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 +; VI-NEXT: s_mov_b32 s4, s0 +; VI-NEXT: s_mov_b32 s5, s1 +; VI-NEXT: s_waitcnt vmcnt(1) +; VI-NEXT: v_cvt_f32_u32_e32 v2, v0 +; VI-NEXT: s_waitcnt vmcnt(0) +; VI-NEXT: v_cvt_f32_u32_e32 v3, v1 +; VI-NEXT: v_rcp_iflag_f32_e32 v4, v2 +; VI-NEXT: v_mul_f32_e32 v4, v3, v4 +; VI-NEXT: v_trunc_f32_e32 v4, v4 +; VI-NEXT: v_cvt_u32_f32_e32 v5, v4 +; VI-NEXT: v_mad_f32 v3, -v4, v2, v3 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, v2 +; VI-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc +; VI-NEXT: v_mul_lo_u32 v0, v2, v0 +; VI-NEXT: v_subrev_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: urem24_i16: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 1 @6 +; EG-NEXT: ALU 25, @11, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 +; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 10: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 11: +; EG-NEXT: UINT_TO_FLT * T0.Y, T1.X, +; EG-NEXT: RECIP_IEEE * T0.Z, PS, +; EG-NEXT: UINT_TO_FLT * T0.W, T0.X, +; EG-NEXT: MUL_IEEE * T1.W, PS, T0.Z, +; EG-NEXT: TRUNC * T1.W, PV.W, +; EG-NEXT: MULADD_IEEE T0.W, -PV.W, T0.Y, T0.W, +; EG-NEXT: TRUNC * T1.W, PV.W, +; EG-NEXT: SETGE * T0.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T0.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.Y, T1.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.X, +; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, +; EG-NEXT: SUB_INT * T1.W, T0.X, PS, +; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) +; EG-NEXT: AND_INT T1.W, PS, literal.x, +; EG-NEXT: LSHL * T0.W, PV.W, literal.y, +; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) +; EG-NEXT: LSHL T0.X, PV.W, PS, +; EG-NEXT: LSHL * T0.W, literal.x, PS, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: MOV T0.Y, 0.0, +; EG-NEXT: MOV * T0.Z, 0.0, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i16, ptr addrspace(1) %in, i16 1 %num = load i16, ptr addrspace(1) %in, align 2 %den = load i16, ptr addrspace(1) %den_ptr, align 2 @@ -269,10 +1466,90 @@ define amdgpu_kernel void @urem24_i16(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}urem24_i32: -; SI-NOT: v_rcp_f32 -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @urem24_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: urem24_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s6, s4, 0xffffff +; SI-NEXT: s_and_b32 s7, s5, 0xffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s6 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s7 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_mul_lo_u32 v0, v0, s5 +; SI-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; SI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: urem24_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s2, s5, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s2 +; VI-NEXT: s_and_b32 s2, s4, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_mul_lo_u32 v0, v0, s5 +; VI-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 +; VI-NEXT: v_and_b32_e32 v0, 0xffffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: urem24_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.X, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -285,14 +1562,105 @@ define amdgpu_kernel void @urem24_i32(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}urem25_i32: ; RCP_IFLAG is for URECIP in the full 32b alg -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 - -; EG-NOT: UINT_TO_FLT -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @urem25_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: urem25_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s2, s4, 0x1ffffff +; SI-NEXT: s_and_b32 s4, s5, 0x1ffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: s_sub_i32 s5, 0, s4 +; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-NEXT: v_mul_lo_u32 v1, s5, v0 +; SI-NEXT: v_mul_hi_u32 v1, v0, v1 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_mul_hi_u32 v0, s2, v0 +; SI-NEXT: v_readfirstlane_b32 s5, v0 +; SI-NEXT: s_mul_i32 s5, s5, s4 +; SI-NEXT: s_sub_i32 s2, s2, s5 +; SI-NEXT: s_sub_i32 s5, s2, s4 +; SI-NEXT: s_cmp_ge_u32 s2, s4 +; SI-NEXT: s_cselect_b32 s2, s5, s2 +; SI-NEXT: s_sub_i32 s5, s2, s4 +; SI-NEXT: s_cmp_ge_u32 s2, s4 +; SI-NEXT: s_cselect_b32 s4, s5, s2 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: urem25_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; VI-NEXT: s_sub_i32 s3, 0, s4 +; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; VI-NEXT: v_mul_lo_u32 v1, s3, v0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_mul_hi_u32 v1, v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_mul_hi_u32 v0, s5, v0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_mul_i32 s6, s6, s4 +; VI-NEXT: s_sub_i32 s5, s5, s6 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b32 s5, s6, s5 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b32 s4, s6, s5 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: urem25_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.X, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -305,14 +1673,105 @@ define amdgpu_kernel void @urem25_i32(ptr addrspace(1) %out, ptr addrspace(1) %i ret void } -; FUNC-LABEL: {{^}}test_no_urem24_i32_1: ; RCP_IFLAG is for URECIP in the full 32b alg -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 - -; EG-NOT: UINT_TO_FLT -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @test_no_urem24_i32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: test_no_urem24_i32_1: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s2, s4, 0xffffff +; SI-NEXT: s_and_b32 s4, s5, 0x1ffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: s_sub_i32 s5, 0, s4 +; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-NEXT: v_mul_lo_u32 v1, s5, v0 +; SI-NEXT: v_mul_hi_u32 v1, v0, v1 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_mul_hi_u32 v0, s2, v0 +; SI-NEXT: v_readfirstlane_b32 s5, v0 +; SI-NEXT: s_mul_i32 s5, s5, s4 +; SI-NEXT: s_sub_i32 s2, s2, s5 +; SI-NEXT: s_sub_i32 s5, s2, s4 +; SI-NEXT: s_cmp_ge_u32 s2, s4 +; SI-NEXT: s_cselect_b32 s2, s5, s2 +; SI-NEXT: s_sub_i32 s5, s2, s4 +; SI-NEXT: s_cmp_ge_u32 s2, s4 +; SI-NEXT: s_cselect_b32 s4, s5, s2 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: test_no_urem24_i32_1: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s3, 0x1ffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; VI-NEXT: s_sub_i32 s3, 0, s4 +; VI-NEXT: s_and_b32 s5, s2, 0xffffff +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; VI-NEXT: v_mul_lo_u32 v1, s3, v0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_mul_hi_u32 v1, v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_mul_hi_u32 v0, s5, v0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_mul_i32 s6, s6, s4 +; VI-NEXT: s_sub_i32 s5, s5, s6 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b32 s5, s6, s5 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b32 s4, s6, s5 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: test_no_urem24_i32_1: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.X, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -325,14 +1784,105 @@ define amdgpu_kernel void @test_no_urem24_i32_1(ptr addrspace(1) %out, ptr addrs ret void } -; FUNC-LABEL: {{^}}test_no_urem24_i32_2: ; RCP_IFLAG is for URECIP in the full 32b alg -; SI: v_rcp_iflag -; SI-NOT: v_rcp_f32 - -; EG-NOT: UINT_TO_FLT -; EG-NOT: RECIP_IEEE define amdgpu_kernel void @test_no_urem24_i32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: test_no_urem24_i32_2: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s2, s4, 0x1ffffff +; SI-NEXT: s_and_b32 s4, s5, 0xffffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: s_sub_i32 s5, 0, s4 +; SI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; SI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; SI-NEXT: v_mul_lo_u32 v1, s5, v0 +; SI-NEXT: v_mul_hi_u32 v1, v0, v1 +; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; SI-NEXT: v_mul_hi_u32 v0, s2, v0 +; SI-NEXT: v_readfirstlane_b32 s5, v0 +; SI-NEXT: s_mul_i32 s5, s5, s4 +; SI-NEXT: s_sub_i32 s2, s2, s5 +; SI-NEXT: s_sub_i32 s5, s2, s4 +; SI-NEXT: s_cmp_ge_u32 s2, s4 +; SI-NEXT: s_cselect_b32 s2, s5, s2 +; SI-NEXT: s_sub_i32 s5, s2, s4 +; SI-NEXT: s_cmp_ge_u32 s2, s4 +; SI-NEXT: s_cselect_b32 s4, s5, s2 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: v_mov_b32_e32 v0, s4 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: test_no_urem24_i32_2: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s4, s3, 0xffffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; VI-NEXT: s_sub_i32 s3, 0, s4 +; VI-NEXT: s_and_b32 s5, s2, 0x1ffffff +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; VI-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 +; VI-NEXT: v_cvt_u32_f32_e32 v0, v0 +; VI-NEXT: v_mul_lo_u32 v1, s3, v0 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_mul_hi_u32 v1, v0, v1 +; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; VI-NEXT: v_mul_hi_u32 v0, s5, v0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_mul_i32 s6, s6, s4 +; VI-NEXT: s_sub_i32 s5, s5, s6 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b32 s5, s6, s5 +; VI-NEXT: s_sub_i32 s6, s5, s4 +; VI-NEXT: s_cmp_ge_u32 s5, s4 +; VI-NEXT: s_cselect_b32 s4, s6, s5 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: test_no_urem24_i32_2: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00) +; EG-NEXT: SUB_INT T1.W, 0.0, PV.W, +; EG-NEXT: RECIP_UINT * T0.Y, PV.W, +; EG-NEXT: MULLO_INT * T0.Z, PV.W, PS, +; EG-NEXT: MULHI * T0.Z, T0.Y, PS, +; EG-NEXT: ADD_INT T1.W, T0.Y, PS, +; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, +; EG-NEXT: 33554431(9.403954e-38), 0(0.000000e+00) +; EG-NEXT: MULHI * T0.X, PS, PV.W, +; EG-NEXT: MULLO_INT * T0.X, PS, T0.W, +; EG-NEXT: SUB_INT * T1.W, T2.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T3.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT * T1.W, PV.W, T1.W, PS, +; EG-NEXT: SETGE_UINT T2.W, PV.W, T0.W, +; EG-NEXT: SUB_INT * T0.W, PV.W, T0.W, +; EG-NEXT: CNDE_INT T0.X, PV.W, T1.W, PS, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -345,12 +1895,85 @@ define amdgpu_kernel void @test_no_urem24_i32_2(ptr addrspace(1) %out, ptr addrs ret void } -; FUNC-LABEL: {{^}}test_udiv24_u16_u23_i32: -; SI: v_rcp_iflag_f32 -; SI: v_and_b32_e32 v{{[0-9]+}}, 0x7fffff, - -; EG: RECIP_IEEE define amdgpu_kernel void @test_udiv24_u16_u23_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: test_udiv24_u16_u23_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0xffff +; SI-NEXT: s_and_b32 s5, s5, 0x7fffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: test_udiv24_u16_u23_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s3, s3, 0x7fffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s3 +; VI-NEXT: s_and_b32 s2, s2, 0xffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: test_udiv24_u16_u23_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) +; EG-NEXT: UINT_TO_FLT * T0.Y, PV.W, +; EG-NEXT: AND_INT T0.W, T0.X, literal.x, +; EG-NEXT: RECIP_IEEE * T0.X, PS, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: UINT_TO_FLT * T0.Z, PV.W, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: AND_INT T0.X, PV.W, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 8388607(1.175494e-38), 2(2.802597e-45) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 @@ -363,12 +1986,85 @@ define amdgpu_kernel void @test_udiv24_u16_u23_i32(ptr addrspace(1) %out, ptr ad ret void } -; FUNC-LABEL: {{^}}test_udiv24_u23_u16_i32: -; SI: v_rcp_iflag_f32 -; SI: v_and_b32_e32 v{{[0-9]+}}, 0x7fffff, - -; EG: RECIP_IEEE define amdgpu_kernel void @test_udiv24_u23_u16_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; SI-LABEL: test_udiv24_u23_u16_i32: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_and_b32 s4, s4, 0x7fffff +; SI-NEXT: s_and_b32 s5, s5, 0xffff +; SI-NEXT: v_cvt_f32_u32_e32 v0, s4 +; SI-NEXT: v_cvt_f32_u32_e32 v1, s5 +; SI-NEXT: v_rcp_iflag_f32_e32 v2, v1 +; SI-NEXT: v_mul_f32_e32 v2, v0, v2 +; SI-NEXT: v_trunc_f32_e32 v2, v2 +; SI-NEXT: v_fma_f32 v0, -v2, v1, v0 +; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 +; SI-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 +; SI-NEXT: v_addc_u32_e32 v0, vcc, 0, v2, vcc +; SI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 +; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: test_udiv24_u23_u16_i32: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_and_b32 s3, s3, 0xffff +; VI-NEXT: v_cvt_f32_u32_e32 v0, s3 +; VI-NEXT: s_and_b32 s2, s2, 0x7fffff +; VI-NEXT: v_cvt_f32_u32_e32 v1, s2 +; VI-NEXT: s_mov_b32 s3, 0xf000 +; VI-NEXT: v_rcp_iflag_f32_e32 v2, v0 +; VI-NEXT: s_mov_b32 s2, -1 +; VI-NEXT: v_mul_f32_e32 v2, v1, v2 +; VI-NEXT: v_trunc_f32_e32 v2, v2 +; VI-NEXT: v_cvt_u32_f32_e32 v3, v2 +; VI-NEXT: v_mad_f32 v1, -v2, v0, v1 +; VI-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 +; VI-NEXT: v_addc_u32_e32 v0, vcc, 0, v3, vcc +; VI-NEXT: v_and_b32_e32 v0, 0x7fffff, v0 +; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-NEXT: s_endpgm +; +; EG-LABEL: test_udiv24_u23_u16_i32: +; EG: ; %bb.0: +; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] +; EG-NEXT: TEX 0 @6 +; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] +; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; EG-NEXT: CF_END +; EG-NEXT: PAD +; EG-NEXT: Fetch clause starting at 6: +; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 +; EG-NEXT: ALU clause starting at 8: +; EG-NEXT: MOV * T0.X, KC0[2].Z, +; EG-NEXT: ALU clause starting at 9: +; EG-NEXT: AND_INT * T0.W, T0.Y, literal.x, +; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) +; EG-NEXT: UINT_TO_FLT * T0.Y, PV.W, +; EG-NEXT: AND_INT T0.W, T0.X, literal.x, +; EG-NEXT: RECIP_IEEE * T0.X, PS, +; EG-NEXT: 8388607(1.175494e-38), 0(0.000000e+00) +; EG-NEXT: UINT_TO_FLT * T0.Z, PV.W, +; EG-NEXT: MUL_IEEE * T0.W, PS, T0.X, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: MULADD_IEEE T1.W, -PV.W, T0.Y, T0.Z, +; EG-NEXT: TRUNC * T0.W, PV.W, +; EG-NEXT: SETGE * T1.W, |PV.W|, T0.Y, +; EG-NEXT: CNDE T1.W, PV.W, 0.0, literal.x, +; EG-NEXT: FLT_TO_UINT * T0.X, T0.W, +; EG-NEXT: 1(1.401298e-45), 0(0.000000e+00) +; EG-NEXT: ADD_INT * T0.W, PS, PV.W, +; EG-NEXT: AND_INT T0.X, PV.W, literal.x, +; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; EG-NEXT: 8388607(1.175494e-38), 2(2.802597e-45) %den_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 %num = load i32, ptr addrspace(1) %in, align 4 %den = load i32, ptr addrspace(1) %den_ptr, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll b/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll new file mode 100644 index 0000000..ea12732 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/use-after-free-after-cleanup-failed-vreg.ll @@ -0,0 +1,16 @@ +; RUN: not llc -mcpu=gfx1100 -mtriple=amdgcn-amd-amdhsa -stress-regalloc=4 -filetype=null -verify-machineinstrs %s 2>&1 | FileCheck %s + +; CHECK: error: <unknown>:0:0: ran out of registers during register allocation in function 'f' +; CHECK-NOT: Bad machine code + +define <16 x half> @f(i1 %LGV2, <16 x half> %0) { +BB: + br i1 %LGV2, label %SW_C3, label %SW_C + +SW_C: ; preds = %BB + %B1 = fmul <16 x half> %0, zeroinitializer + ret <16 x half> %B1 + +SW_C3: ; preds = %BB + ret <16 x half> <half 0xH0000, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison, half poison> +} diff --git a/llvm/test/CodeGen/ARM/cmp-select-sign.ll b/llvm/test/CodeGen/ARM/cmp-select-sign.ll index 298a623..61cdc3b 100644 --- a/llvm/test/CodeGen/ARM/cmp-select-sign.ll +++ b/llvm/test/CodeGen/ARM/cmp-select-sign.ll @@ -75,31 +75,31 @@ define i4 @sign_i4(i4 %a) { define i8 @sign_i8(i8 %a) { ; ARM-LABEL: sign_i8: ; ARM: @ %bb.0: -; ARM-NEXT: lsl r0, r0, #24 +; ARM-NEXT: sxtb r0, r0 ; ARM-NEXT: mov r1, #1 -; ARM-NEXT: orr r0, r1, r0, asr #31 +; ARM-NEXT: orr r0, r1, r0, asr #7 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: sign_i8: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r0, r0, #24 -; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: sxtb r0, r0 +; THUMB-NEXT: asrs r1, r0, #7 ; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: orrs r0, r1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: sign_i8: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r0, r0, #24 +; THUMB2-NEXT: sxtb r0, r0 ; THUMB2-NEXT: movs r1, #1 -; THUMB2-NEXT: orr.w r0, r1, r0, asr #31 +; THUMB2-NEXT: orr.w r0, r1, r0, asr #7 ; THUMB2-NEXT: bx lr ; ; THUMBV8-LABEL: sign_i8: ; THUMBV8: @ %bb.0: -; THUMBV8-NEXT: lsls r0, r0, #24 +; THUMBV8-NEXT: sxtb r0, r0 ; THUMBV8-NEXT: movs r1, #1 -; THUMBV8-NEXT: orr.w r0, r1, r0, asr #31 +; THUMBV8-NEXT: orr.w r0, r1, r0, asr #7 ; THUMBV8-NEXT: bx lr %c = icmp sgt i8 %a, -1 %res = select i1 %c, i8 1, i8 -1 @@ -109,31 +109,31 @@ define i8 @sign_i8(i8 %a) { define i16 @sign_i16(i16 %a) { ; ARM-LABEL: sign_i16: ; ARM: @ %bb.0: -; ARM-NEXT: lsl r0, r0, #16 +; ARM-NEXT: sxth r0, r0 ; ARM-NEXT: mov r1, #1 -; ARM-NEXT: orr r0, r1, r0, asr #31 +; ARM-NEXT: orr r0, r1, r0, asr #15 ; ARM-NEXT: bx lr ; ; THUMB-LABEL: sign_i16: ; THUMB: @ %bb.0: -; THUMB-NEXT: lsls r0, r0, #16 -; THUMB-NEXT: asrs r1, r0, #31 +; THUMB-NEXT: sxth r0, r0 +; THUMB-NEXT: asrs r1, r0, #15 ; THUMB-NEXT: movs r0, #1 ; THUMB-NEXT: orrs r0, r1 ; THUMB-NEXT: bx lr ; ; THUMB2-LABEL: sign_i16: ; THUMB2: @ %bb.0: -; THUMB2-NEXT: lsls r0, r0, #16 +; THUMB2-NEXT: sxth r0, r0 ; THUMB2-NEXT: movs r1, #1 -; THUMB2-NEXT: orr.w r0, r1, r0, asr #31 +; THUMB2-NEXT: orr.w r0, r1, r0, asr #15 ; THUMB2-NEXT: bx lr ; ; THUMBV8-LABEL: sign_i16: ; THUMBV8: @ %bb.0: -; THUMBV8-NEXT: lsls r0, r0, #16 +; THUMBV8-NEXT: sxth r0, r0 ; THUMBV8-NEXT: movs r1, #1 -; THUMBV8-NEXT: orr.w r0, r1, r0, asr #31 +; THUMBV8-NEXT: orr.w r0, r1, r0, asr #15 ; THUMBV8-NEXT: bx lr %c = icmp sgt i16 %a, -1 %res = select i1 %c, i16 1, i16 -1 diff --git a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll index cda1e83..aa3cdc3 100644 --- a/llvm/test/CodeGen/ARM/nop_concat_vectors.ll +++ b/llvm/test/CodeGen/ARM/nop_concat_vectors.ll @@ -1,10 +1,10 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s -;CHECK: _foo -;CHECK-NOT: vld1.32 -;CHECK-NOT: vst1.32 -;CHECK: bx define void @foo(ptr %J) { +; CHECK-LABEL: foo: +; CHECK: @ %bb.0: +; CHECK-NEXT: bx lr %A = load <16 x i8>, ptr %J %T1 = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %T2 = shufflevector <8 x i8> %T1, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> diff --git a/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll new file mode 100644 index 0000000..9988d5b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/byval-arg-vectorize.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mcpu=sm_70 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %} + +target triple = "nvptx64-nvidia-cuda" + +%struct.double2 = type { double, double } + +declare %struct.double2 @add(ptr align(16) byval(%struct.double2), ptr align(16) byval(%struct.double2)) + +define void @call_byval(ptr %out, ptr %in1, ptr %in2) { +; CHECK-LABEL: call_byval( +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<12>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [call_byval_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .align 16 .b8 param0[16]; +; CHECK-NEXT: .param .align 16 .b8 param1[16]; +; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: ld.param.b64 %rd2, [call_byval_param_2]; +; CHECK-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd2]; +; CHECK-NEXT: st.param.v2.b64 [param1], {%rd3, %rd4}; +; CHECK-NEXT: ld.param.b64 %rd5, [call_byval_param_1]; +; CHECK-NEXT: ld.v2.b64 {%rd6, %rd7}, [%rd5]; +; CHECK-NEXT: st.param.v2.b64 [param0], {%rd6, %rd7}; +; CHECK-NEXT: call.uni (retval0), add, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd8, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd9, [retval0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: st.b64 [%rd1+8], %rd8; +; CHECK-NEXT: st.b64 [%rd1], %rd9; +; CHECK-NEXT: ret; + %call = call %struct.double2 @add(ptr align(16) byval(%struct.double2) %in1, ptr align(16) byval(%struct.double2) %in2) + store %struct.double2 %call, ptr %out, align 16 + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll index 48209a8..dd3e4ec 100644 --- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll +++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll @@ -12,14 +12,14 @@ define %struct.64 @test_return_type_mismatch(ptr %p) { ; CHECK-NEXT: .reg .b64 %rd<40>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_return_type_mismatch_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: .param .align 1 .b8 retval0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: mov.b64 %rd1, callee; -; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0; +; CHECK-NEXT: mov.b64 %rd2, callee; +; CHECK-NEXT: call (retval0), %rd2, (param0), prototype_0; ; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7]; ; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6]; ; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5]; @@ -90,16 +90,16 @@ define i64 @test_param_count_mismatch(ptr %p) { ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: .param .b64 param1; ; CHECK-NEXT: .param .b64 retval0; -; CHECK-NEXT: st.param.b64 [param0], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _); ; CHECK-NEXT: st.param.b64 [param1], 7; -; CHECK-NEXT: mov.b64 %rd1, callee; -; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2; +; CHECK-NEXT: mov.b64 %rd2, callee; +; CHECK-NEXT: call (retval0), %rd2, (param0, param1), prototype_2; ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 2 ; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; @@ -114,14 +114,14 @@ define %struct.64 @test_return_type_mismatch_variadic(ptr %p) { ; CHECK-NEXT: .reg .b64 %rd<40>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_return_type_mismatch_variadic_param_0]; ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .b64 param0; ; CHECK-NEXT: .param .align 1 .b8 retval0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: mov.b64 %rd1, callee_variadic; -; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3; +; CHECK-NEXT: mov.b64 %rd2, callee_variadic; +; CHECK-NEXT: call (retval0), %rd2, (param0), prototype_3; ; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7]; ; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6]; ; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5]; diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 38185c7b..045704b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -124,15 +124,15 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-NEXT: .reg .b64 %rd<4>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0; -; PTX-NEXT: cvta.param.u64 %rd3, %rd2; +; PTX-NEXT: mov.b64 %rd1, grid_const_escape_param_0; +; PTX-NEXT: cvta.param.u64 %rd2, %rd1; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: .param .b32 retval0; -; PTX-NEXT: st.param.b64 [param0], %rd3; +; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _); -; PTX-NEXT: mov.b64 %rd1, escape; -; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0; +; PTX-NEXT: mov.b64 %rd3, escape; +; PTX-NEXT: call (retval0), %rd3, (param0), prototype_0; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_escape( @@ -157,25 +157,25 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %SPL, __local_depot4; ; PTX-NEXT: cvta.local.u64 %SP, %SPL; -; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0; +; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0; ; PTX-NEXT: ld.param.b32 %r1, [multiple_grid_const_escape_param_1]; -; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2; -; PTX-NEXT: cvta.param.u64 %rd4, %rd3; -; PTX-NEXT: cvta.param.u64 %rd5, %rd2; -; PTX-NEXT: add.u64 %rd6, %SP, 0; -; PTX-NEXT: add.u64 %rd7, %SPL, 0; -; PTX-NEXT: st.local.b32 [%rd7], %r1; +; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2; +; PTX-NEXT: cvta.param.u64 %rd3, %rd2; +; PTX-NEXT: cvta.param.u64 %rd4, %rd1; +; PTX-NEXT: add.u64 %rd5, %SP, 0; +; PTX-NEXT: add.u64 %rd6, %SPL, 0; +; PTX-NEXT: st.local.b32 [%rd6], %r1; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: .param .b64 param1; ; PTX-NEXT: .param .b64 param2; ; PTX-NEXT: .param .b32 retval0; -; PTX-NEXT: st.param.b64 [param2], %rd4; -; PTX-NEXT: st.param.b64 [param1], %rd6; -; PTX-NEXT: st.param.b64 [param0], %rd5; +; PTX-NEXT: st.param.b64 [param2], %rd3; +; PTX-NEXT: st.param.b64 [param1], %rd5; +; PTX-NEXT: st.param.b64 [param0], %rd4; ; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _); -; PTX-NEXT: mov.b64 %rd1, escape3; -; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1; +; PTX-NEXT: mov.b64 %rd7, escape3; +; PTX-NEXT: call (retval0), %rd7, (param0, param1, param2), prototype_1; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape( @@ -256,20 +256,20 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou ; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0; -; PTX-NEXT: ld.param.b64 %rd3, [grid_const_partial_escape_param_1]; -; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3; -; PTX-NEXT: cvta.param.u64 %rd5, %rd2; +; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escape_param_0; +; PTX-NEXT: ld.param.b64 %rd2, [grid_const_partial_escape_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; +; PTX-NEXT: cvta.param.u64 %rd4, %rd1; ; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0]; ; PTX-NEXT: add.s32 %r2, %r1, %r1; -; PTX-NEXT: st.global.b32 [%rd4], %r2; +; PTX-NEXT: st.global.b32 [%rd3], %r2; ; PTX-NEXT: { // callseq 2, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: .param .b32 retval0; -; PTX-NEXT: st.param.b64 [param0], %rd5; +; PTX-NEXT: st.param.b64 [param0], %rd4; ; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _); -; PTX-NEXT: mov.b64 %rd1, escape; -; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2; +; PTX-NEXT: mov.b64 %rd5, escape; +; PTX-NEXT: call (retval0), %rd5, (param0), prototype_2; ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape( @@ -295,21 +295,21 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0; -; PTX-NEXT: ld.param.b64 %rd3, [grid_const_partial_escapemem_param_1]; -; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3; -; PTX-NEXT: cvta.param.u64 %rd5, %rd2; +; PTX-NEXT: mov.b64 %rd1, grid_const_partial_escapemem_param_0; +; PTX-NEXT: ld.param.b64 %rd2, [grid_const_partial_escapemem_param_1]; +; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2; +; PTX-NEXT: cvta.param.u64 %rd4, %rd1; ; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escapemem_param_0]; ; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4]; -; PTX-NEXT: st.global.b64 [%rd4], %rd5; +; PTX-NEXT: st.global.b64 [%rd3], %rd4; ; PTX-NEXT: add.s32 %r3, %r1, %r2; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .b64 param0; ; PTX-NEXT: .param .b32 retval0; -; PTX-NEXT: st.param.b64 [param0], %rd5; +; PTX-NEXT: st.param.b64 [param0], %rd4; ; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _); -; PTX-NEXT: mov.b64 %rd1, escape; -; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3; +; PTX-NEXT: mov.b64 %rd5, escape; +; PTX-NEXT: call (retval0), %rd5, (param0), prototype_3; ; PTX-NEXT: } // callseq 3 ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll index a592b82..51f6b00 100644 --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -150,8 +150,8 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[12]; ; CHECK: .param .align 16 .b8 retval0[12]; - ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}}; + ; CHECK-DAG: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.b32 [param0+8], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), callee_St4x3, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8]; @@ -240,8 +240,8 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[20]; ; CHECK: .param .align 16 .b8 retval0[20]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}}; + ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.b32 [param0+16], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), callee_St4x5, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16]; @@ -296,8 +296,8 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; ; CHECK: .param .align 16 .b8 retval0[24]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x6, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -358,9 +358,9 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[28]; ; CHECK: .param .align 16 .b8 retval0[28]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}}; + ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.b32 [param0+24], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), callee_St4x7, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -566,8 +566,8 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; ; CHECK: .param .align 16 .b8 retval0[24]; - ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; - ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}}; + ; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; + ; CHECK-DAG: st.param.b64 [param0+16], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), callee_St8x3, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16]; diff --git a/llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll b/llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll new file mode 100644 index 0000000..c119da6 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-lower-arbitrary-sized-ints.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK32 +; RUN: llc --verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr8 < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK64 + +define ptr @lower_args(ptr %_0, i32 %0, i32 %1, i32 %2, i32 %3, ptr %4, ptr %5, i64 %6, i24 %7) { +; CHECK-LABEL: lower_args: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr +entry: + ret ptr %_0 +} + +define i32 @lower_args_withops_zeroext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i24 %i) { +; CHECK32-LABEL: lower_args_withops_zeroext: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: lwz r3, 56(r1) +; CHECK32-NEXT: addi r3, r3, 255 +; CHECK32-NEXT: clrlwi r3, r3, 8 +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: lower_args_withops_zeroext: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: lwz r3, 116(r1) +; CHECK64-NEXT: addi r3, r3, 255 +; CHECK64-NEXT: clrldi r3, r3, 40 +; CHECK64-NEXT: blr +entry: + %0 = add i24 %i, 255 + %1 = zext i24 %0 to i32 + ret i32 %1 +} + +define i32 @lower_args_withops_signext(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i24 signext %i) { +; CHECK32-LABEL: lower_args_withops_signext: +; CHECK32: # %bb.0: # %entry +; CHECK32-NEXT: lwz r3, 56(r1) +; CHECK32-NEXT: slwi r3, r3, 8 +; CHECK32-NEXT: srawi r3, r3, 8 +; CHECK32-NEXT: slwi r3, r3, 8 +; CHECK32-NEXT: addi r3, r3, 22272 +; CHECK32-NEXT: srawi r3, r3, 8 +; CHECK32-NEXT: blr +; +; CHECK64-LABEL: lower_args_withops_signext: +; CHECK64: # %bb.0: # %entry +; CHECK64-NEXT: lwz r3, 116(r1) +; CHECK64-NEXT: slwi r3, r3, 8 +; CHECK64-NEXT: srawi r3, r3, 8 +; CHECK64-NEXT: addi r3, r3, 87 +; CHECK64-NEXT: sldi r3, r3, 40 +; CHECK64-NEXT: sradi r3, r3, 40 +; CHECK64-NEXT: blr +entry: + %0 = add i24 %i, 87 + %1 = sext i24 %0 to i32 + ret i32 %1 +} diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll index 9f62477..af0942e 100644 --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -56,155 +56,153 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: .cfi_offset v29, -240 ; CHECK-NEXT: .cfi_offset v30, -224 ; CHECK-NEXT: .cfi_offset v31, -208 +; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 2, 728(1) +; CHECK-NEXT: ld 14, 688(1) +; CHECK-NEXT: ld 11, 704(1) +; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 21, 5 +; CHECK-NEXT: lwa 5, 0(7) +; CHECK-NEXT: ld 7, 720(1) ; CHECK-NEXT: std 22, 464(1) # 8-byte Folded Spill ; CHECK-NEXT: std 23, 472(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 22, 5 -; CHECK-NEXT: ld 5, 848(1) +; CHECK-NEXT: mr 22, 6 +; CHECK-NEXT: ld 6, 848(1) ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: mr 11, 7 -; CHECK-NEXT: ld 23, 688(1) -; CHECK-NEXT: ld 7, 728(1) +; CHECK-NEXT: ld 15, 736(1) ; CHECK-NEXT: std 18, 432(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 440(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 18, 6 -; CHECK-NEXT: li 6, 9 ; CHECK-NEXT: ld 19, 768(1) -; CHECK-NEXT: ld 2, 760(1) -; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill -; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: ld 27, 816(1) -; CHECK-NEXT: ld 26, 808(1) -; CHECK-NEXT: std 14, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 15, 736(1) -; CHECK-NEXT: lxv 39, 0(8) +; CHECK-NEXT: ld 18, 760(1) ; CHECK-NEXT: std 30, 528(1) # 8-byte Folded Spill ; CHECK-NEXT: std 31, 536(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 30, 704(1) -; CHECK-NEXT: lxv 38, 0(9) -; CHECK-NEXT: std 20, 448(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 456(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 21, 784(1) +; CHECK-NEXT: ld 12, 696(1) +; CHECK-NEXT: lxv 0, 0(9) +; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 1, 0(8) +; CHECK-NEXT: cmpldi 3, 9 +; CHECK-NEXT: ld 30, 824(1) +; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 29, 840(1) +; CHECK-NEXT: ld 28, 832(1) +; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 23, 784(1) ; CHECK-NEXT: ld 20, 776(1) ; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill ; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill -; CHECK-NEXT: iselgt 3, 3, 6 -; CHECK-NEXT: ld 6, 720(1) +; CHECK-NEXT: ld 25, 800(1) ; CHECK-NEXT: ld 24, 792(1) -; CHECK-NEXT: std 10, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 7, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 496(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 504(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 27, 816(1) +; CHECK-NEXT: ld 26, 808(1) +; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 17, 752(1) +; CHECK-NEXT: extswsli 9, 5, 3 +; CHECK-NEXT: lxv 4, 0(14) +; CHECK-NEXT: std 14, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 12, 40(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 0, 5, 40 +; CHECK-NEXT: sldi 14, 5, 5 +; CHECK-NEXT: mulli 31, 5, 24 +; CHECK-NEXT: lxv 38, 0(2) +; CHECK-NEXT: lxv 2, 0(11) +; CHECK-NEXT: std 2, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 2, 5, 48 +; CHECK-NEXT: sldi 5, 5, 4 +; CHECK-NEXT: ld 16, 744(1) +; CHECK-NEXT: lxv 5, 0(10) +; CHECK-NEXT: std 6, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: ld 6, 712(1) +; CHECK-NEXT: mr 10, 7 +; CHECK-NEXT: add 7, 14, 21 +; CHECK-NEXT: lxv 13, 0(19) +; CHECK-NEXT: std 8, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 6, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 8, 11 +; CHECK-NEXT: li 11, 9 +; CHECK-NEXT: iselgt 3, 3, 11 ; CHECK-NEXT: addi 3, 3, -2 -; CHECK-NEXT: lxv 6, 0(19) -; CHECK-NEXT: lxv 11, 0(7) -; CHECK-NEXT: std 5, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 840(1) -; CHECK-NEXT: lxv 12, 0(6) -; CHECK-NEXT: rldicl 12, 3, 61, 3 +; CHECK-NEXT: rldicl 11, 3, 61, 3 +; CHECK-NEXT: lxv 3, 0(12) +; CHECK-NEXT: lxv 40, 0(6) +; CHECK-NEXT: std 18, 112(1) # 8-byte Folded Spill ; CHECK-NEXT: std 19, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: add 19, 21, 5 +; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: lxv 39, 0(10) +; CHECK-NEXT: addi 3, 7, 32 +; CHECK-NEXT: add 12, 31, 21 ; CHECK-NEXT: std 20, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 4, 0(21) -; CHECK-NEXT: ld 25, 800(1) -; CHECK-NEXT: lxv 33, 0(10) -; CHECK-NEXT: lxv 32, 0(23) -; CHECK-NEXT: lxv 36, 0(30) -; CHECK-NEXT: std 16, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 17, 752(1) -; CHECK-NEXT: ld 16, 744(1) -; CHECK-NEXT: std 28, 512(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 520(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 29, 712(1) -; CHECK-NEXT: ld 28, 696(1) -; CHECK-NEXT: std 8, 56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 9, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 37, 0(28) -; CHECK-NEXT: lxv 13, 0(29) -; CHECK-NEXT: mr 8, 29 -; CHECK-NEXT: mr 9, 30 -; CHECK-NEXT: mr 10, 28 -; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 33, 0(15) +; CHECK-NEXT: lxv 32, 0(16) ; CHECK-NEXT: std 26, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 10, 0(15) -; CHECK-NEXT: lxv 9, 0(16) -; CHECK-NEXT: li 28, 1 -; CHECK-NEXT: stfd 26, 544(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 552(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 8, 0(17) -; CHECK-NEXT: lxv 7, 0(2) +; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 37, 0(17) +; CHECK-NEXT: lxv 36, 0(18) +; CHECK-NEXT: std 30, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 12, 0(20) +; CHECK-NEXT: lxv 11, 0(23) +; CHECK-NEXT: add 20, 21, 9 ; CHECK-NEXT: stfd 28, 560(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 29, 568(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 5, 0(20) -; CHECK-NEXT: lxv 3, 0(24) +; CHECK-NEXT: lxv 10, 0(24) +; CHECK-NEXT: lxv 9, 0(25) ; CHECK-NEXT: stfd 30, 576(1) # 8-byte Folded Spill ; CHECK-NEXT: stfd 31, 584(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 2, 0(25) -; CHECK-NEXT: lxv 1, 0(26) +; CHECK-NEXT: lxv 8, 0(26) +; CHECK-NEXT: lxv 7, 0(27) +; CHECK-NEXT: addi 12, 12, 32 +; CHECK-NEXT: li 27, 0 +; CHECK-NEXT: mr 26, 21 ; CHECK-NEXT: stxv 52, 208(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 53, 224(1) # 16-byte Folded Spill -; CHECK-NEXT: lxv 0, 0(27) +; CHECK-NEXT: lxv 6, 0(30) +; CHECK-NEXT: lxv 41, 0(28) +; CHECK-NEXT: addi 7, 11, 1 +; CHECK-NEXT: add 11, 0, 21 +; CHECK-NEXT: li 28, 1 ; CHECK-NEXT: stxv 54, 240(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 55, 256(1) # 16-byte Folded Spill +; CHECK-NEXT: lxv 43, 0(29) +; CHECK-NEXT: lxv 42, 0(5) ; CHECK-NEXT: stxv 56, 272(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 57, 288(1) # 16-byte Folded Spill +; CHECK-NEXT: addi 11, 11, 32 ; CHECK-NEXT: stxv 58, 304(1) # 16-byte Folded Spill -; CHECK-NEXT: std 5, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 832(1) ; CHECK-NEXT: stxv 59, 320(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 60, 336(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 61, 352(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 62, 368(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 63, 384(1) # 16-byte Folded Spill -; CHECK-NEXT: std 15, 88(1) # 8-byte Folded Spill ; CHECK-NEXT: std 16, 96(1) # 8-byte Folded Spill ; CHECK-NEXT: std 17, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 5, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 5, 824(1) -; CHECK-NEXT: std 5, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: lwa 5, 0(11) -; CHECK-NEXT: li 27, 0 -; CHECK-NEXT: ld 7, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: mulli 6, 5, 40 -; CHECK-NEXT: sldi 0, 5, 4 -; CHECK-NEXT: extswsli 14, 5, 3 -; CHECK-NEXT: lxv 40, 0(7) -; CHECK-NEXT: ld 7, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: add 31, 14, 22 -; CHECK-NEXT: add 11, 0, 22 -; CHECK-NEXT: mr 26, 22 -; CHECK-NEXT: addi 3, 11, 32 -; CHECK-NEXT: addi 11, 12, 1 -; CHECK-NEXT: mulli 12, 5, 48 -; CHECK-NEXT: addi 31, 31, 32 -; CHECK-NEXT: add 19, 22, 6 -; CHECK-NEXT: sldi 6, 5, 5 -; CHECK-NEXT: mulli 5, 5, 24 -; CHECK-NEXT: lxv 41, 0(7) -; CHECK-NEXT: add 20, 22, 6 -; CHECK-NEXT: add 21, 22, 5 -; CHECK-NEXT: ld 5, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 43, 0(5) -; CHECK-NEXT: ld 5, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: lxv 42, 0(5) +; CHECK-NEXT: std 24, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 152(1) # 8-byte Folded Spill ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 5, 12, 27, 0 -; CHECK-NEXT: mr 6, 18 -; CHECK-NEXT: mr 29, 21 +; CHECK-NEXT: maddld 5, 2, 27, 0 +; CHECK-NEXT: mr 6, 22 ; CHECK-NEXT: mr 30, 20 -; CHECK-NEXT: mr 2, 19 -; CHECK-NEXT: mtctr 11 -; CHECK-NEXT: add 25, 22, 5 -; CHECK-NEXT: maddld 5, 12, 27, 14 -; CHECK-NEXT: add 24, 22, 5 +; CHECK-NEXT: mr 29, 19 +; CHECK-NEXT: mtctr 7 +; CHECK-NEXT: add 25, 21, 5 +; CHECK-NEXT: maddld 5, 2, 27, 14 +; CHECK-NEXT: add 24, 21, 5 +; CHECK-NEXT: maddld 5, 2, 27, 31 +; CHECK-NEXT: add 23, 21, 5 ; CHECK-NEXT: mr 5, 26 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ @@ -212,66 +210,66 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 34, 0(6) ; CHECK-NEXT: lxvp 44, 0(5) -; CHECK-NEXT: xvmaddadp 39, 45, 35 -; CHECK-NEXT: lxvp 46, 0(24) -; CHECK-NEXT: xvmaddadp 38, 47, 35 -; CHECK-NEXT: lxvp 48, 0(25) -; CHECK-NEXT: lxvp 50, 0(29) -; CHECK-NEXT: lxvp 62, 0(30) -; CHECK-NEXT: lxvp 60, 0(2) +; CHECK-NEXT: xvmaddadp 1, 45, 35 +; CHECK-NEXT: lxvp 46, 0(30) +; CHECK-NEXT: xvmaddadp 0, 47, 35 +; CHECK-NEXT: lxvp 48, 0(29) +; CHECK-NEXT: lxvp 50, 0(23) +; CHECK-NEXT: lxvp 62, 0(24) +; CHECK-NEXT: lxvp 60, 0(25) ; CHECK-NEXT: lxvp 58, 32(6) ; CHECK-NEXT: lxvp 56, 32(5) -; CHECK-NEXT: lxvp 54, 32(24) -; CHECK-NEXT: lxvp 52, 32(25) -; CHECK-NEXT: lxvp 30, 32(29) -; CHECK-NEXT: lxvp 28, 32(30) -; CHECK-NEXT: lxvp 26, 32(2) -; CHECK-NEXT: xvmaddadp 33, 49, 35 -; CHECK-NEXT: xvmaddadp 32, 51, 35 -; CHECK-NEXT: xvmaddadp 37, 63, 35 -; CHECK-NEXT: xvmaddadp 36, 61, 35 -; CHECK-NEXT: xvmaddadp 13, 44, 34 -; CHECK-NEXT: xvmaddadp 12, 46, 34 -; CHECK-NEXT: xvmaddadp 11, 48, 34 -; CHECK-NEXT: xvmaddadp 10, 50, 34 -; CHECK-NEXT: xvmaddadp 9, 62, 34 -; CHECK-NEXT: xvmaddadp 8, 60, 34 -; CHECK-NEXT: xvmaddadp 7, 57, 59 -; CHECK-NEXT: xvmaddadp 6, 55, 59 -; CHECK-NEXT: xvmaddadp 5, 53, 59 -; CHECK-NEXT: xvmaddadp 4, 31, 59 -; CHECK-NEXT: xvmaddadp 3, 29, 59 -; CHECK-NEXT: xvmaddadp 2, 27, 59 -; CHECK-NEXT: xvmaddadp 1, 56, 58 -; CHECK-NEXT: xvmaddadp 0, 54, 58 -; CHECK-NEXT: xvmaddadp 40, 52, 58 +; CHECK-NEXT: lxvp 54, 32(30) +; CHECK-NEXT: lxvp 52, 32(29) +; CHECK-NEXT: lxvp 30, 32(23) +; CHECK-NEXT: lxvp 28, 32(24) +; CHECK-NEXT: lxvp 26, 32(25) +; CHECK-NEXT: xvmaddadp 5, 49, 35 +; CHECK-NEXT: xvmaddadp 4, 51, 35 +; CHECK-NEXT: xvmaddadp 3, 63, 35 +; CHECK-NEXT: xvmaddadp 2, 61, 35 +; CHECK-NEXT: xvmaddadp 40, 44, 34 +; CHECK-NEXT: xvmaddadp 39, 46, 34 +; CHECK-NEXT: xvmaddadp 38, 48, 34 +; CHECK-NEXT: xvmaddadp 33, 50, 34 +; CHECK-NEXT: xvmaddadp 32, 62, 34 +; CHECK-NEXT: xvmaddadp 37, 60, 34 +; CHECK-NEXT: xvmaddadp 36, 57, 59 +; CHECK-NEXT: xvmaddadp 13, 55, 59 +; CHECK-NEXT: xvmaddadp 12, 53, 59 +; CHECK-NEXT: xvmaddadp 11, 31, 59 +; CHECK-NEXT: xvmaddadp 10, 29, 59 +; CHECK-NEXT: xvmaddadp 9, 27, 59 +; CHECK-NEXT: xvmaddadp 8, 56, 58 +; CHECK-NEXT: xvmaddadp 7, 54, 58 +; CHECK-NEXT: xvmaddadp 6, 52, 58 ; CHECK-NEXT: xvmaddadp 41, 30, 58 ; CHECK-NEXT: xvmaddadp 43, 28, 58 ; CHECK-NEXT: xvmaddadp 42, 26, 58 ; CHECK-NEXT: addi 6, 6, 64 ; CHECK-NEXT: addi 5, 5, 64 +; CHECK-NEXT: addi 30, 30, 64 +; CHECK-NEXT: addi 29, 29, 64 +; CHECK-NEXT: addi 23, 23, 64 ; CHECK-NEXT: addi 24, 24, 64 ; CHECK-NEXT: addi 25, 25, 64 -; CHECK-NEXT: addi 29, 29, 64 -; CHECK-NEXT: addi 30, 30, 64 -; CHECK-NEXT: addi 2, 2, 64 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # ; CHECK-NEXT: addi 28, 28, 6 -; CHECK-NEXT: add 26, 26, 12 -; CHECK-NEXT: add 31, 31, 12 -; CHECK-NEXT: add 19, 19, 12 -; CHECK-NEXT: add 3, 3, 12 -; CHECK-NEXT: add 20, 20, 12 -; CHECK-NEXT: add 21, 21, 12 +; CHECK-NEXT: add 26, 26, 2 +; CHECK-NEXT: add 20, 20, 2 +; CHECK-NEXT: add 11, 11, 2 +; CHECK-NEXT: add 19, 19, 2 +; CHECK-NEXT: add 3, 3, 2 +; CHECK-NEXT: add 12, 12, 2 ; CHECK-NEXT: addi 27, 27, 1 ; CHECK-NEXT: cmpld 28, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit -; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload ; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(3) +; CHECK-NEXT: stxv 1, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload ; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload @@ -284,7 +282,7 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload ; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 0, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload ; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload @@ -297,8 +295,8 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) -; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload @@ -310,40 +308,41 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %. ; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) -; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(10) -; CHECK-NEXT: stxv 36, 0(9) -; CHECK-NEXT: stxv 13, 0(8) +; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload ; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 2, 0(8) +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: stxv 39, 0(10) +; CHECK-NEXT: stxv 38, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 32, 0(3) ; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 36, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) +; CHECK-NEXT: stxv 13, 0(3) ; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 5, 0(3) +; CHECK-NEXT: stxv 12, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(3) +; CHECK-NEXT: stxv 11, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 3, 0(3) +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(3) +; CHECK-NEXT: stxv 9, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: stxv 8, 0(3) ; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: stxv 7, 0(3) ; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 40, 0(3) +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 41, 0(3) ; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll index 799ba63..8fb4c21 100644 --- a/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll +++ b/llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll @@ -40,9 +40,10 @@ define signext i32 @test(ptr noalias %PtrA, ptr noalias %PtrB, i32 signext %LenA ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_4: # %if.end9 ; CHECK-NEXT: # -; CHECK-NEXT: lwzx 10, 6, 9 +; CHECK-NEXT: add 9, 3, 9 +; CHECK-NEXT: lwz 10, 4(9) ; CHECK-NEXT: addi 10, 10, 1 -; CHECK-NEXT: stwx 10, 6, 9 +; CHECK-NEXT: stw 10, 4(9) ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_5: # %if.then ; CHECK-NEXT: lwax 3, 9, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll new file mode 100644 index 0000000..dd63fa0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ssegN-load.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv64 -mattr=+zve64x,+zvl128b < %s | FileCheck %s + +define {<8 x i8>, <8 x i8>} @load_factor2(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg2e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8> } @llvm.riscv.sseg2.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret {<8 x i8>, <8 x i8>} %1 +} + +define {<8 x i8>, <8 x i8>, <8 x i8>} @load_factor3(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg3e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg3.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret { <8 x i8>, <8 x i8>, <8 x i8> } %1 +} + +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor4(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg4e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg4.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 +} + +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor5(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor5: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg5e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg5.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 +} + +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor6(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor6: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg6e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg6.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 +} + +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor7(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg7e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg7.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 +} + +define {<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>} @load_factor8(ptr %ptr, i64 %stride) { +; CHECK-LABEL: load_factor8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlsseg8e8.v v8, (a0), a1 +; CHECK-NEXT: ret + %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.sseg8.load.mask.v8i8.i64.i64(ptr %ptr, i64 %stride, <8 x i1> splat (i1 true), i64 8) + ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index 3dc83d5..38d38f7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -1636,3 +1636,49 @@ define <8 x half> @vector_interleave8_v8f16_v1f16(<1 x half> %a, <1 x half> %b, %res = call <8 x half> @llvm.vector.interleave8.v8f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g, <1 x half> %h) ret <8 x half> %res } + +define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) { +; CHECK-LABEL: interleave4_const_splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave4_const_splat_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 3 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave4_const_splat_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: vmv.v.i v8, 3 +; ZIP-NEXT: ret + %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3)) + ret <8 x i16> %retval +} + +define <8 x i16> @interleave4_same_nonconst_splat_v8i16(i16 %a) { +; CHECK-LABEL: interleave4_same_nonconst_splat_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave4_same_nonconst_splat_v8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.x v8, a0 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave4_same_nonconst_splat_v8i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZIP-NEXT: vmv.v.x v8, a0 +; ZIP-NEXT: ret + %ins = insertelement <2 x i16> poison, i16 %a, i32 0 + %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> zeroinitializer + %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat) + ret <8 x i16> %retval +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index 01cc5c5..ee38257 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -14947,3 +14947,147 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv2f64(<vscale x 2 x %res = call <vscale x 16 x double> @llvm.vector.interleave8.nxv16f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4, <vscale x 2 x double> %v5, <vscale x 2 x double> %v6, <vscale x 2 x double> %v7) ret <vscale x 16 x double> %res } + +define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() { +; CHECK-LABEL: interleave2_same_const_splat_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave2_same_const_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 3 +; ZVBB-NEXT: ret + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() { +; V-LABEL: interleave2_diff_const_splat_nxv4i16: +; V: # %bb.0: +; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; V-NEXT: vmv.v.i v9, 3 +; V-NEXT: li a0, 4 +; V-NEXT: vmv.v.i v10, -1 +; V-NEXT: vwaddu.vx v8, v9, a0 +; V-NEXT: vwmaccu.vx v8, a0, v10 +; V-NEXT: csrr a0, vlenb +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; V-NEXT: vslidedown.vx v9, v8, a0 +; V-NEXT: vslideup.vx v8, v9, a0 +; V-NEXT: ret +; +; ZVBB-LABEL: interleave2_diff_const_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 4 +; ZVBB-NEXT: li a0, 3 +; ZVBB-NEXT: vwsll.vi v9, v8, 16 +; ZVBB-NEXT: vwaddu.wx v8, v9, a0 +; ZVBB-NEXT: csrr a0, vlenb +; ZVBB-NEXT: srli a0, a0, 2 +; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVBB-NEXT: vslidedown.vx v9, v8, a0 +; ZVBB-NEXT: vslideup.vx v8, v9, a0 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave2_diff_const_splat_nxv4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; ZIP-NEXT: vmv.v.i v9, 4 +; ZIP-NEXT: vmv.v.i v10, 3 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: ri.vzip2b.vv v11, v10, v9 +; ZIP-NEXT: ri.vzip2a.vv v8, v10, v9 +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZIP-NEXT: vslideup.vx v8, v11, a0 +; ZIP-NEXT: ret + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4)) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) { +; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave2_same_nonconst_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZVBB-NEXT: vmv.v.x v8, a0 +; ZVBB-NEXT: ret + %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0 + %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) { +; V-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; V: # %bb.0: +; V-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; V-NEXT: vmv.v.x v9, a0 +; V-NEXT: vmv.v.i v10, -1 +; V-NEXT: csrr a0, vlenb +; V-NEXT: vwaddu.vx v8, v9, a1 +; V-NEXT: vwmaccu.vx v8, a1, v10 +; V-NEXT: srli a0, a0, 2 +; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; V-NEXT: vslidedown.vx v9, v8, a0 +; V-NEXT: vslideup.vx v8, v9, a0 +; V-NEXT: ret +; +; ZVBB-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZVBB-NEXT: vmv.v.x v8, a1 +; ZVBB-NEXT: csrr a1, vlenb +; ZVBB-NEXT: vwsll.vi v9, v8, 16 +; ZVBB-NEXT: vwaddu.wx v8, v9, a0 +; ZVBB-NEXT: srli a1, a1, 2 +; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVBB-NEXT: vslidedown.vx v9, v8, a1 +; ZVBB-NEXT: vslideup.vx v8, v9, a1 +; ZVBB-NEXT: ret +; +; ZIP-LABEL: interleave2_diff_nonconst_splat_nxv4i16: +; ZIP: # %bb.0: +; ZIP-NEXT: vsetvli a2, zero, e16, mf2, ta, ma +; ZIP-NEXT: vmv.v.x v9, a0 +; ZIP-NEXT: vmv.v.x v10, a1 +; ZIP-NEXT: csrr a0, vlenb +; ZIP-NEXT: ri.vzip2b.vv v11, v9, v10 +; ZIP-NEXT: ri.vzip2a.vv v8, v9, v10 +; ZIP-NEXT: srli a0, a0, 2 +; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma +; ZIP-NEXT: vslideup.vx v8, v11, a0 +; ZIP-NEXT: ret + %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0 + %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0 + %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer + %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2) + ret <vscale x 4 x i16> %retval +} + +define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() { +; CHECK-LABEL: interleave4_same_const_splat_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 3 +; CHECK-NEXT: ret +; +; ZVBB-LABEL: interleave4_same_const_splat_nxv8i16: +; ZVBB: # %bb.0: +; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; ZVBB-NEXT: vmv.v.i v8, 3 +; ZVBB-NEXT: ret + %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3)) + ret <vscale x 8 x i16> %retval +} diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll index 09b065a..048ce96 100644 --- a/llvm/test/CodeGen/RISCV/zilsd.ll +++ b/llvm/test/CodeGen/RISCV/zilsd.ll @@ -117,3 +117,22 @@ entyr: store i64 0, ptr @g ret void } + +define void @large_offset(ptr nocapture %p, i64 %d) nounwind { +; CHECK-LABEL: large_offset: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 4 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: ld a2, -384(a0) +; CHECK-NEXT: addi a2, a2, 1 +; CHECK-NEXT: seqz a1, a2 +; CHECK-NEXT: add a3, a3, a1 +; CHECK-NEXT: sd a2, -384(a0) +; CHECK-NEXT: ret +entry: + %add.ptr = getelementptr inbounds i64, ptr %p, i64 2000 + %a = load i64, ptr %add.ptr, align 8 + %b = add i64 %a, 1 + store i64 %b, ptr %add.ptr, align 8 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll new file mode 100644 index 0000000..fa708ab --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll @@ -0,0 +1,12 @@ +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: result and argument must have the same number of components + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define spir_func void @test(<8 x float> %in) { + %res = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in) + ret void +} + +declare spir_func float @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll new file mode 100644 index 0000000..630b2fd --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll @@ -0,0 +1,12 @@ +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: result and argument must have the same number of components + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define spir_func void @test(<8 x float> %in) { + %res = tail call spir_func <4 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in) + ret void +} + +declare spir_func <4 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>) diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll new file mode 100644 index 0000000..dcad78d --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll @@ -0,0 +1,62 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - -filetype=obj | spirv-val %} + +; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; CHECK-ERROR: the builtin requires the following SPIR-V extension: SPV_INTEL_tensor_float32_conversion + +; CHECK: OpCapability TensorFloat32RoundingINTEL +; CHECK: OpExtension "SPV_INTEL_tensor_float32_conversion" + +; CHECK-DAG: %[[VoidTy:.*]] = OpTypeVoid +; CHECK-DAG: %[[FP32Ty:.*]] = OpTypeFloat 32 +; CHECK-DAG: %[[VecFloat2:.*]] = OpTypeVector %[[FP32Ty]] 2 +; CHECK-DAG: %[[VecFloat3:.*]] = OpTypeVector %[[FP32Ty]] 3 +; CHECK-DAG: %[[VecFloat4:.*]] = OpTypeVector %[[FP32Ty]] 4 +; CHECK-DAG: %[[VecFloat8:.*]] = OpTypeVector %[[FP32Ty]] 8 +; CHECK-DAG: %[[VecFloat16:.*]] = OpTypeVector %[[FP32Ty]] 16 +; CHECK-DAG: %[[FloatConstId:.*]] = OpConstant %[[FP32Ty]] 1.5 + +; CHECK: OpFunction %[[VoidTy]] +; CHECK: %[[FP32ValId:.*]] = OpFunctionParameter %[[FP32Ty]] +; CHECK: %[[FP32v8ValId:.*]] = OpFunctionParameter %[[VecFloat8]] +; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] %[[FP32ValId]] +; CHECK: OpRoundFToTF32INTEL %[[VecFloat8]] %[[FP32v8ValId]] +; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] %[[FloatConstId]] + +; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] +; CHECK: OpRoundFToTF32INTEL %[[VecFloat2]] +; CHECK: OpRoundFToTF32INTEL %[[VecFloat3]] +; CHECK: OpRoundFToTF32INTEL %[[VecFloat4]] +; CHECK: OpRoundFToTF32INTEL %[[VecFloat8]] +; CHECK: OpRoundFToTF32INTEL %[[VecFloat16]] + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" +target triple = "spir64-unknown-unknown" + +define spir_func void @test(float %a, <8 x float> %in) { + %res1 = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELf(float %a) + %res2 = tail call spir_func <8 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in) + %res3 = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELf(float 1.500000e+00) + ret void +} + +declare spir_func float @_Z25__spirv_RoundFToTF32INTELf(float) +declare spir_func <8 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>) + +define dso_local spir_kernel void @test_ocl(float %a) { +entry: + %res4 = call spir_func float @_Z35intel_round_as_tensor_float32_floatt(float 0.000000e+00) + %res5 = call spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2 x float> zeroinitializer) + %res6 = call spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float> zeroinitializer) + %res7 = call spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float> zeroinitializer) + %res8 = call spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float> zeroinitializer) + %res9 = call spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float> zeroinitializer) + ret void +} + +declare spir_func float @_Z35intel_round_as_tensor_float32_floatt(float) +declare spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2 x float>) +declare spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float>) +declare spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float>) +declare spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float>) +declare spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>) diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll index 085f8b3..9d07b63 100644 --- a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/lifetime.ll @@ -33,7 +33,7 @@ define spir_func void @foo(ptr noundef byval(%tprange) align 8 %_arg_UserRange) %RoundedRangeKernel = alloca %tprange, align 8 call void @llvm.lifetime.start.p0(i64 72, ptr nonnull %RoundedRangeKernel) call void @llvm.memcpy.p0.p0.i64(ptr align 8 %RoundedRangeKernel, ptr align 8 %_arg_UserRange, i64 16, i1 false) - %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 16 + %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 8 call void @llvm.lifetime.end.p0(i64 72, ptr nonnull %RoundedRangeKernel) ret void } @@ -55,7 +55,7 @@ define spir_func void @bar(ptr noundef byval(%tprange) align 8 %_arg_UserRange) %RoundedRangeKernel = alloca %tprange, align 8 call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull %RoundedRangeKernel) call void @llvm.memcpy.p0.p0.i64(ptr align 8 %RoundedRangeKernel, ptr align 8 %_arg_UserRange, i64 16, i1 false) - %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 16 + %KernelFunc = getelementptr inbounds i8, ptr %RoundedRangeKernel, i64 8 call void @llvm.lifetime.end.p0(i64 -1, ptr nonnull %RoundedRangeKernel) ret void } diff --git a/llvm/test/CodeGen/SPIRV/logical-struct-access.ll b/llvm/test/CodeGen/SPIRV/logical-struct-access.ll index a1ff1e0..66337b1 100644 --- a/llvm/test/CodeGen/SPIRV/logical-struct-access.ll +++ b/llvm/test/CodeGen/SPIRV/logical-struct-access.ll @@ -1,4 +1,5 @@ -; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -print-after-all | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: [[uint:%[0-9]+]] = OpTypeInt 32 0 diff --git a/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll new file mode 100644 index 0000000..26dc60e --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-1.ll @@ -0,0 +1,46 @@ +; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +%struct.S1 = type { <4 x i32>, [10 x <4 x float>], <4 x float> } +%struct.S2 = type { <4 x float>, <4 x i32> } + +@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1 + +define <4 x float> @main() { +entry: + %0 = tail call target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32 0, i32 1, i32 1, i32 0, i1 false, ptr nonnull @.str) + %3 = tail call noundef align 1 dereferenceable(192) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) %0, i32 0) + +; CHECK-DAG: %[[#ulong:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#ulong_1:]] = OpConstant %[[#ulong]] 1 +; CHECK-DAG: %[[#ulong_3:]] = OpConstant %[[#ulong]] 3 + +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#uint_0:]] = OpConstant %[[#uint]] 0 +; CHECK-DAG: %[[#uint_10:]] = OpConstant %[[#uint]] 10 + +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#v4f:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#arr_v4f:]] = OpTypeArray %[[#v4f]] %[[#uint_10]] +; CHECK-DAG: %[[#S1:]] = OpTypeStruct %[[#]] %[[#arr_v4f]] %[[#]] +; CHECK-DAG: %[[#sb_S1:]] = OpTypePointer StorageBuffer %[[#S1]] +; CHECK-DAG: %[[#sb_v4f:]] = OpTypePointer StorageBuffer %[[#v4f]] + +; CHECK: %[[#tmp:]] = OpAccessChain %[[#sb_S1]] %[[#]] %[[#uint_0]] %[[#uint_0]] +; CHECK: %[[#ptr:]] = OpInBoundsAccessChain %[[#sb_v4f]] %[[#tmp]] %[[#ulong_1]] %[[#ulong_3]] +; This rewritten GEP combined all constant indices into a single value. +; We should make sure the correct indices are retrieved. + %arrayidx.i = getelementptr inbounds nuw i8, ptr addrspace(11) %3, i64 64 + +; CHECK: OpLoad %[[#v4f]] %[[#ptr]] + %4 = load <4 x float>, ptr addrspace(11) %arrayidx.i, align 1 + + ret <4 x float> %4 +} + +declare i32 @llvm.spv.flattened.thread.id.in.group() +declare target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32, i32, i32, i32, i1, ptr) +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0), i32) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + diff --git a/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll new file mode 100644 index 0000000..a6efb38 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access-constant-index-2.ll @@ -0,0 +1,54 @@ +; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +%struct.S1 = type { <4 x i32>, [10 x <4 x float>], <4 x float> } +%struct.S2 = type { <4 x float>, <4 x i32> } + +@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1 + +define <4 x float> @main(i32 %index) { +entry: + %0 = tail call target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32 0, i32 1, i32 1, i32 0, i1 false, ptr nonnull @.str) + %3 = tail call noundef align 1 dereferenceable(192) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) %0, i32 0) + +; CHECK-DAG: %[[#ulong:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#ulong_1:]] = OpConstant %[[#ulong]] 1 + +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#uint_0:]] = OpConstant %[[#uint]] 0 +; CHECK-DAG: %[[#uint_10:]] = OpConstant %[[#uint]] 10 +; CHECK-DAG: %[[#uint_16:]] = OpConstant %[[#uint]] 16 + +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#v4f:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#arr_v4f:]] = OpTypeArray %[[#v4f]] %[[#uint_10]] +; CHECK-DAG: %[[#S1:]] = OpTypeStruct %[[#]] %[[#arr_v4f]] %[[#]] +; CHECK-DAG: %[[#sb_S1:]] = OpTypePointer StorageBuffer %[[#S1]] +; CHECK-DAG: %[[#sb_arr_v4f:]] = OpTypePointer StorageBuffer %[[#arr_v4f]] +; CHECK-DAG: %[[#sb_v4f:]] = OpTypePointer StorageBuffer %[[#v4f]] + +; CHECK: %[[#a:]] = OpAccessChain %[[#sb_S1]] %[[#]] %[[#uint_0]] %[[#uint_0]] +; CHECK: %[[#b:]] = OpInBoundsAccessChain %[[#sb_arr_v4f]] %[[#a]] %[[#ulong_1]] + %4 = getelementptr inbounds nuw i8, ptr addrspace(11) %3, i64 16 + +; CHECK: %[[#offset:]] = OpIMul %[[#]] %[[#]] %[[#uint_16]] +; Offset is computed in bytes. Make sure we reconvert it back to an index. + %offset = mul i32 %index, 16 + +; CHECK: %[[#index:]] = OpUDiv %[[#]] %[[#offset]] %[[#uint_16]] +; CHECK: %[[#c:]] = OpInBoundsAccessChain %[[#sb_v4f]] %[[#b]] %[[#index]] + %5 = getelementptr inbounds nuw i8, ptr addrspace(11) %4, i32 %offset + +; CHECK: OpLoad %[[#v4f]] %[[#c]] + %6 = load <4 x float>, ptr addrspace(11) %5, align 1 + + ret <4 x float> %6 +} + +declare i32 @llvm.spv.flattened.thread.id.in.group() +declare target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32, i32, i32, i32, i1, ptr) +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0), i32) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } + + diff --git a/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll new file mode 100644 index 0000000..8e6b5a6 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/structured-buffer-access.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O3 -mtriple=spirv1.6-unknown-vulkan1.3-compute %s -o - -filetype=obj | spirv-val %} + +; struct S1 { +; int4 i; +; float4 f; +; }; +; struct S2 { +; float4 f; +; int4 i; +; }; +; +; StructuredBuffer<S1> In : register(t1); +; RWStructuredBuffer<S2> Out : register(u0); +; +; [numthreads(1,1,1)] +; void main(uint GI : SV_GroupIndex) { +; Out[GI].f = In[GI].f; +; Out[GI].i = In[GI].i; +; } + +%struct.S1 = type { <4 x i32>, <4 x float> } +%struct.S2 = type { <4 x float>, <4 x i32> } + +@.str = private unnamed_addr constant [3 x i8] c"In\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"Out\00", align 1 + +define void @main() local_unnamed_addr #0 { +; CHECK-LABEL: main +; CHECK: %43 = OpFunction %2 None %3 ; -- Begin function main +; CHECK-NEXT: %1 = OpLabel +; CHECK-NEXT: %44 = OpVariable %28 Function %38 +; CHECK-NEXT: %45 = OpVariable %27 Function %39 +; CHECK-NEXT: %46 = OpCopyObject %19 %40 +; CHECK-NEXT: %47 = OpCopyObject %16 %41 +; CHECK-NEXT: %48 = OpLoad %4 %42 +; CHECK-NEXT: %49 = OpAccessChain %13 %46 %29 %48 +; CHECK-NEXT: %50 = OpInBoundsAccessChain %9 %49 %31 +; CHECK-NEXT: %51 = OpLoad %8 %50 Aligned 1 +; CHECK-NEXT: %52 = OpAccessChain %11 %47 %29 %48 +; CHECK-NEXT: %53 = OpInBoundsAccessChain %9 %52 %29 +; CHECK-NEXT: OpStore %53 %51 Aligned 1 +; CHECK-NEXT: %54 = OpAccessChain %6 %49 %29 +; CHECK-NEXT: %55 = OpLoad %5 %54 Aligned 1 +; CHECK-NEXT: %56 = OpInBoundsAccessChain %6 %52 %31 +; CHECK-NEXT: OpStore %56 %55 Aligned 1 +; CHECK-NEXT: OpReturn +; CHECK-NEXT: OpFunctionEnd +entry: + %0 = tail call target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32 0, i32 1, i32 1, i32 0, i1 false, ptr nonnull @.str) + %1 = tail call target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(i32 0, i32 0, i32 1, i32 0, i1 false, ptr nonnull @.str.2) + %2 = tail call i32 @llvm.spv.flattened.thread.id.in.group() + %3 = tail call noundef align 1 dereferenceable(32) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) %0, i32 %2) + %f.i = getelementptr inbounds nuw i8, ptr addrspace(11) %3, i64 16 + %4 = load <4 x float>, ptr addrspace(11) %f.i, align 1 + %5 = tail call noundef align 1 dereferenceable(32) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1) %1, i32 %2) + store <4 x float> %4, ptr addrspace(11) %5, align 1 + %6 = load <4 x i32>, ptr addrspace(11) %3, align 1 + %i6.i = getelementptr inbounds nuw i8, ptr addrspace(11) %5, i64 16 + store <4 x i32> %6, ptr addrspace(11) %i6.i, align 1 + ret void +} + +declare i32 @llvm.spv.flattened.thread.id.in.group() + +declare target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(i32, i32, i32, i32, i1, ptr) + +declare target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(i32, i32, i32, i32, i1, ptr) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S2s_12_1t(target("spirv.VulkanBuffer", [0 x %struct.S2], 12, 1), i32) + +declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0s_struct.S1s_12_0t(target("spirv.VulkanBuffer", [0 x %struct.S1], 12, 0), i32) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/DebugInfo/PDB/obj-globalhash.test b/llvm/test/DebugInfo/PDB/obj-globalhash.test index 116ea91..bff6826 100644 --- a/llvm/test/DebugInfo/PDB/obj-globalhash.test +++ b/llvm/test/DebugInfo/PDB/obj-globalhash.test @@ -1,15 +1,15 @@ -RUN: yaml2obj %p/Inputs/obj-hashes-1.yaml -o %T/obj-hashes-1.obj -RUN: yaml2obj %p/Inputs/obj-hashes-2.yaml -o %T/obj-hashes-2.obj -RUN: echo obj-hashes-1 > %T/hashes-combined.out -RUN: llvm-pdbutil dump -type-extras %T/obj-hashes-1.obj >> %T/hashes-combined.out -RUN: echo obj-hashes-2 >> %T/hashes-combined.out -RUN: llvm-pdbutil dump -type-extras %T/obj-hashes-2.obj >> %T/hashes-combined.out -RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-ONE %s -RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-TWO %s -RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-THREE %s -RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-FOUR %s -RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-FIVE %s -RUN: cat %T/hashes-combined.out | FileCheck --check-prefix=CHECK-SIX %s +RUN: yaml2obj %p/Inputs/obj-hashes-1.yaml -o %t.obj-hashes-1.obj +RUN: yaml2obj %p/Inputs/obj-hashes-2.yaml -o %t.obj-hashes-2.obj +RUN: echo obj-hashes-1 > %t.hashes-combined.out +RUN: llvm-pdbutil dump -type-extras %t.obj-hashes-1.obj >> %t.hashes-combined.out +RUN: echo obj-hashes-2 >> %t.hashes-combined.out +RUN: llvm-pdbutil dump -type-extras %t.obj-hashes-2.obj >> %t.hashes-combined.out +RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-ONE %s +RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-TWO %s +RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-THREE %s +RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-FOUR %s +RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-FIVE %s +RUN: cat %t.hashes-combined.out | FileCheck --check-prefix=CHECK-SIX %s ; char**. Both the local and global hashes should be the same, since the only ; back-references are for simple types which have fixed indices. diff --git a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test index b97f8ab..5f930ad 100644 --- a/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test +++ b/llvm/test/Examples/OrcV2Examples/lljit-with-thinlto-summaries.test @@ -1,15 +1,15 @@ -# RUN: opt -module-summary %p/Inputs/main-mod.ll -o %T/main-mod.bc -# RUN: opt -module-summary %p/Inputs/foo-mod.ll -o %T/foo-mod.bc -# RUN: opt -module-summary %p/Inputs/bar-mod.ll -o %T/bar-mod.bc +# RUN: opt -module-summary %p/Inputs/main-mod.ll -o %t.main-mod.bc +# RUN: opt -module-summary %p/Inputs/foo-mod.ll -o %t.foo-mod.bc +# RUN: opt -module-summary %p/Inputs/bar-mod.ll -o %t.bar-mod.bc # REQUIRES: default_triple # UNSUPPORTED: target=powerpc64{{.*}} -# RUN: llvm-lto -thinlto -o %T/main-foo-bar %T/main-mod.bc %T/foo-mod.bc %T/bar-mod.bc +# RUN: llvm-lto -thinlto -o %t.main-foo-bar %t.main-mod.bc %t.foo-mod.bc %t.bar-mod.bc -# RUN: LLJITWithThinLTOSummaries %T/main-foo-bar.thinlto.bc 2>&1 | FileCheck %s +# RUN: LLJITWithThinLTOSummaries %t.main-foo-bar.thinlto.bc 2>&1 | FileCheck %s -# CHECK: About to load module: {{.*}}/main-mod.bc -# CHECK-DAG: About to load module: {{.*}}/foo-mod.bc -# CHECK-DAG: About to load module: {{.*}}/bar-mod.bc +# CHECK: About to load module: {{.*}}main-mod.bc +# CHECK-DAG: About to load module: {{.*}}foo-mod.bc +# CHECK-DAG: About to load module: {{.*}}bar-mod.bc # CHECK: 'main' finished with exit code: 0 diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s index cf87861..6578539 100644 --- a/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/MachO_foo-in-weak-dylib.s @@ -1,8 +1,8 @@ -# RUN: yaml2obj -o %T/libfoo.dylib %S/Inputs/libFooUniversalDylib.yaml +# RUN: yaml2obj -o %t.libfoo.dylib %S/Inputs/libFooUniversalDylib.yaml # RUN: llvm-mc -triple=x86_64-apple-macosx10.9 -filetype=obj \ -# RUN: -o %T/MachO_foo-in-weak-dylib.o %s -# RUN: llvm-jitlink -noexec %T/MachO_foo-in-weak-dylib.o \ -# RUN: -weak_library %T/libfoo.dylib +# RUN: -o %t.MachO_foo-in-weak-dylib.o %s +# RUN: llvm-jitlink -noexec %t.MachO_foo-in-weak-dylib.o \ +# RUN: -weak_library %t.libfoo.dylib # # Check that -weak_library supports universal binaries. diff --git a/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s b/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s index 8259484..7bb22ef 100644 --- a/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s +++ b/llvm/test/MC/AArch64/ELF_ARM64_large-relocations.s @@ -1,7 +1,8 @@ -# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s -# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o -# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s -# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o +# RUN: rm -rf %t && mkdir %t && cd %t +# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o large-reloc.o %s +# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s large-reloc.o +# RUN: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o large-reloc.o %s +# RUN: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s large-reloc.o .text .globl g diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s index 1f40a32..9a63afc 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3-fake16.s @@ -766,3 +766,177 @@ v_cvt_scale_pk8_f32_fp4 v[10:17], v20, 0xcf00 v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 // GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 ; encoding: [0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00] + +v_permlane_bcast_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_bcast_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_bcast_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_bcast_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_bcast_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_bcast_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_bcast_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_bcast_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_bcast_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_down_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_down_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_down_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_down_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_down_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_down_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_down_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_down_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_down_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_down_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_down_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_down_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_down_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_down_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_down_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_down_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_up_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_up_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_up_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_up_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_up_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_up_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_up_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_up_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_up_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_up_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_up_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_up_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_up_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_up_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_up_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_up_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_xor_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_xor_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_xor_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_xor_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_xor_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_xor_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_xor_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_xor_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_xor_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_xor_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_idx_gen_b32 v5, v1, s2 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, s105 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, s105 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, ttmp15 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, vcc_hi +// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, vcc_lo +// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, m0 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, exec_hi +// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, exec_lo +// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00] + +v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 +// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 +// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 100.0 +// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 +// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s index 03f642d..7f11859 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3.s @@ -766,3 +766,177 @@ v_cvt_scale_pk8_f32_fp4 v[10:17], v20, 0xcf00 v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 // GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 ; encoding: [0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00] + +v_permlane_bcast_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_bcast_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_bcast_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_bcast_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_bcast_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_bcast_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_bcast_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_bcast_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_bcast_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_bcast_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_down_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_down_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_down_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_down_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_down_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_down_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_down_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_down_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_down_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_down_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_down_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_down_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_down_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_down_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_down_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_down_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_up_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_up_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_up_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_up_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_up_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_up_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_up_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_up_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_up_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_up_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_up_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_up_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_up_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_up_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_up_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_up_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_xor_b32 v5, v1, s2, s3 +// GFX1250: v_permlane_xor_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00] + +v_permlane_xor_b32 v5, v1, s105, s105 +// GFX1250: v_permlane_xor_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01] + +v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 +// GFX1250: v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01] + +v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo +// GFX1250: v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01] + +v_permlane_xor_b32 v5, v1, vcc_lo, m0 +// GFX1250: v_permlane_xor_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01] + +v_permlane_xor_b32 v5, v1, m0, vcc_hi +// GFX1250: v_permlane_xor_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01] + +v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo +// GFX1250: v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01] + +v_permlane_xor_b32 v5, v1, exec_lo, src_scc +// GFX1250: v_permlane_xor_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03] + +v_permlane_idx_gen_b32 v5, v1, s2 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, s105 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, s105 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, ttmp15 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, vcc_hi +// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, vcc_lo +// GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, m0 +// GFX1250: v_permlane_idx_gen_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, exec_hi +// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00] + +v_permlane_idx_gen_b32 v5, v1, exec_lo +// GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00] + +v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 +// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 +// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 100.0 +// GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 +// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 +// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00] + +v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 100.0 +// GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt index ce8cfcb..53b7958 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3.txt @@ -820,3 +820,174 @@ 0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00 # GFX1250: v_cvt_scale_pk8_f32_fp4 v[10:17], v20, v8 scale_sel:1 ; encoding: [0x0a,0x08,0xa1,0xd6,0x14,0x11,0x02,0x00] + +0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01 +# GFX1250: v_permlane_bcast_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xff,0xa8,0x01] + +0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03 +# GFX1250: v_permlane_bcast_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfd,0xf4,0x03] + +0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01 +# GFX1250: v_permlane_bcast_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xfb,0xac,0x01] + +0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01 +# GFX1250: v_permlane_bcast_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd3,0xa4,0x01] + +0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00 +# GFX1250: v_permlane_bcast_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0x05,0x0c,0x00] + +0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01 +# GFX1250: v_permlane_bcast_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xf7,0xec,0x01] + +0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01 +# GFX1250: v_permlane_bcast_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd7,0xf8,0x01] + +0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01 +# GFX1250: v_permlane_bcast_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x70,0xd6,0x01,0xd5,0xf4,0x01] + +0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01 +# GFX1250: v_permlane_down_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xff,0xa8,0x01] + +0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03 +# GFX1250: v_permlane_down_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfd,0xf4,0x03] + +0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01 +# GFX1250: v_permlane_down_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xfb,0xac,0x01] + +0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01 +# GFX1250: v_permlane_down_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd3,0xa4,0x01] + +0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00 +# GFX1250: v_permlane_down_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0x05,0x0c,0x00] + +0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01 +# GFX1250: v_permlane_down_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xf7,0xec,0x01] + +0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01 +# GFX1250: v_permlane_down_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd7,0xf8,0x01] + +0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01 +# GFX1250: v_permlane_down_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x72,0xd6,0x01,0xd5,0xf4,0x01] + +0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03 +# GFX1250: v_permlane_up_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfd,0xf4,0x03] + +0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01 +# GFX1250: v_permlane_up_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xfb,0xac,0x01] + +0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01 +# GFX1250: v_permlane_up_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd3,0xa4,0x01] + +0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00 +# GFX1250: v_permlane_up_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0x05,0x0c,0x00] + +0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01 +# GFX1250: v_permlane_up_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xf7,0xec,0x01] + +0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01 +# GFX1250: v_permlane_up_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd7,0xf8,0x01] + +0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01 +# GFX1250: v_permlane_up_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x71,0xd6,0x01,0xd5,0xf4,0x01] + +0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01 +# GFX1250: v_permlane_xor_b32 v5, v1, exec_hi, vcc_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xff,0xa8,0x01] + +0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03 +# GFX1250: v_permlane_xor_b32 v5, v1, exec_lo, src_scc ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfd,0xf4,0x03] + +0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01 +# GFX1250: v_permlane_xor_b32 v5, v1, m0, vcc_hi ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xfb,0xac,0x01] + +0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01 +# GFX1250: v_permlane_xor_b32 v5, v1, s105, s105 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd3,0xa4,0x01] + +0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00 +# GFX1250: v_permlane_xor_b32 v5, v1, s2, s3 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0x05,0x0c,0x00] + +0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01 +# GFX1250: v_permlane_xor_b32 v5, v1, ttmp15, ttmp15 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xf7,0xec,0x01] + +0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01 +# GFX1250: v_permlane_xor_b32 v5, v1, vcc_hi, exec_lo ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd7,0xf8,0x01] + +0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01 +# GFX1250: v_permlane_xor_b32 v5, v1, vcc_lo, m0 ; encoding: [0x05,0x00,0x73,0xd6,0x01,0xd5,0xf4,0x01] + +0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xff,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, exec_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfd,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, m0 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xfb,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, s105 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd3,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, s2 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0x05,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, ttmp15 ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xf7,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd7,0x00,0x00] + +0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00 +# GFX1250: v_permlane_idx_gen_b32 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x14,0xd7,0x01,0xd5,0x00,0x00] + +0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_fp8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb4,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_bf8_bf16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xb5,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_fp8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc4,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_bf8_f16 v[10:11], v[20:23], v8 ; encoding: [0x0a,0x00,0xc6,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_fp8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc3,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_bf8_f32 v[10:11], v[20:27], v8 ; encoding: [0x0a,0x00,0xc5,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], 0x42c80000 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_fp4_f32 v10, v[20:27], v8 ; encoding: [0x0a,0x00,0xb0,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_fp4_f16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb3,0xd6,0x14,0x11,0x02,0x00] + +0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42 +# GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], 0x42c80000 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0xff,0x01,0x00,0x00,0x00,0xc8,0x42] + +0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00 +# GFX1250: v_cvt_scalef32_pk8_fp4_bf16 v10, v[20:23], v8 ; encoding: [0x0a,0x00,0xb8,0xd6,0x14,0x11,0x02,0x00] diff --git a/llvm/test/Object/archive-darwin-duplicates.test b/llvm/test/Object/archive-darwin-duplicates.test index 8642a31..f6aafcb 100644 --- a/llvm/test/Object/archive-darwin-duplicates.test +++ b/llvm/test/Object/archive-darwin-duplicates.test @@ -4,13 +4,13 @@ The two members with the duplicated name "test.o" must have unique non-zero timestamps, while baz.o, being unique, remains a zero timestamp. -RUN: mkdir -p %T/sub1 -RUN: printf test > %T/test.o -RUN: printf sub1/test > %T/sub1/test.o -RUN: printf baz > %T/baz.o +RUN: mkdir -p %t.dir/sub1 +RUN: printf test > %t.dir/test.o +RUN: printf sub1/test > %t.dir/sub1/test.o +RUN: printf baz > %t.dir/baz.o RUN: rm -f %t.a -RUN: llvm-ar --format=darwin rcs %t.a %T/sub1/test.o %T/test.o %T/baz.o +RUN: llvm-ar --format=darwin rcs %t.a %t.dir/sub1/test.o %t.dir/test.o %t.dir/baz.o RUN: FileCheck -strict-whitespace %s < %t.a CHECK:#1/12 1 0 0 644 28 ` diff --git a/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td b/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td index fa3484e..93525f7 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td +++ b/llvm/test/TableGen/GlobalISelEmitter/ContextlessPredicates.td @@ -1,7 +1,7 @@ -// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %T/context-non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK_NOPT -input-file=%T/context-non-optimized.cpp -// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %T/context-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK_OPT -input-file=%T/context-optimized.cpp +// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %t.context-non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK_NOPT -input-file=%t.context-non-optimized.cpp +// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %t.context-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK_OPT -input-file=%t.context-optimized.cpp diff --git a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td index c3895b5..4a516c6 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td +++ b/llvm/test/TableGen/GlobalISelEmitter/GlobalISelEmitter.td @@ -1,26 +1,26 @@ -// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %T/non-optimized.cpp -// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %T/optimized.cpp -// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common %s -o %T/default.cpp +// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %t.non-optimized.cpp +// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=true %s -o %t.optimized.cpp +// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common %s -o %t.default.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19N -input-file=%T/non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19O -input-file=%T/optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19N -input-file=%t.non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R19C,R19O -input-file=%t.optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21N -input-file=%T/non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21O -input-file=%T/optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21N -input-file=%t.non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R21C,R21O -input-file=%t.optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20N -input-file=%T/non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20O -input-file=%T/optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20N -input-file=%t.non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R20C,R20O -input-file=%t.optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00N -input-file=%T/non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00O -input-file=%T/optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00N -input-file=%t.non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R00C,R00O -input-file=%t.optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01N -input-file=%T/non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01O -input-file=%T/optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01N -input-file=%t.non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R01C,R01O -input-file=%t.optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02N,NOOPT -input-file=%T/non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02O -input-file=%T/optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02N,NOOPT -input-file=%t.non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK,R02C,R02O -input-file=%t.optimized.cpp -// RUN: diff %T/default.cpp %T/optimized.cpp +// RUN: diff %t.default.cpp %t.optimized.cpp include "llvm/Target/Target.td" include "GlobalISelEmitterCommon.td" diff --git a/llvm/test/TableGen/GlobalISelEmitter/HwModes.td b/llvm/test/TableGen/GlobalISelEmitter/HwModes.td index f112577..04f6872 100644 --- a/llvm/test/TableGen/GlobalISelEmitter/HwModes.td +++ b/llvm/test/TableGen/GlobalISelEmitter/HwModes.td @@ -1,5 +1,5 @@ -// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %T/hwmode-non-optimized.cpp -// RUN: FileCheck %s --check-prefixes=CHECK -input-file=%T/hwmode-non-optimized.cpp +// RUN: llvm-tblgen -gen-global-isel -I %p/../../../include -I %p/../Common -optimize-match-table=false %s -o %t.hwmode-non-optimized.cpp +// RUN: FileCheck %s --check-prefixes=CHECK -input-file=%t.hwmode-non-optimized.cpp include "llvm/Target/Target.td" diff --git a/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll b/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll index be3663c..b18ad5d 100644 --- a/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll +++ b/llvm/test/Transforms/GVN/PRE/pre-after-rle.ll @@ -1,30 +1,52 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes='require<phi-values>,gvn' -S < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='require<phi-values>,gvn' -S < %s | FileCheck %s --check-prefixes=CHECK-MEMDEP +; RUN: opt -passes='require<phi-values>,gvn<memoryssa>' -S < %s | FileCheck %s --check-prefixes=CHECK-MEMSSA declare noalias ptr @malloc(i64) ; Detecting that %s is fully redundant should let us detect that %w is partially ; redundant. define void @fn1(ptr noalias %start, ptr %width, i32 %h) { -; CHECK-LABEL: @fn1( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024) -; CHECK-NEXT: store ptr [[CALL]], ptr [[START:%.*]], align 8 -; CHECK-NEXT: br label [[PREHEADER:%.*]] -; CHECK: preheader: -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[PREHEADER_BODY_CRIT_EDGE:%.*]], label [[EXIT:%.*]] -; CHECK: preheader.body_crit_edge: -; CHECK-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH:%.*]], align 8 -; CHECK-NEXT: br label [[BODY:%.*]] -; CHECK: body: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], [[BODY]] ] -; CHECK-NEXT: store i32 0, ptr [[CALL]], align 4 -; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]] -; CHECK-NEXT: br i1 [[CMP3]], label [[BODY]], label [[PREHEADER]] -; CHECK: exit: -; CHECK-NEXT: ret void +; CHECK-MEMDEP-LABEL: define void @fn1( +; CHECK-MEMDEP-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]]) { +; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]] +; CHECK-MEMDEP-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024) +; CHECK-MEMDEP-NEXT: store ptr [[CALL]], ptr [[START]], align 8 +; CHECK-MEMDEP-NEXT: br label %[[PREHEADER:.*]] +; CHECK-MEMDEP: [[PREHEADER]]: +; CHECK-MEMDEP-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]] +; CHECK-MEMDEP-NEXT: br i1 [[CMP]], label %[[PREHEADER_BODY_CRIT_EDGE:.*]], label %[[EXIT:.*]] +; CHECK-MEMDEP: [[PREHEADER_BODY_CRIT_EDGE]]: +; CHECK-MEMDEP-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH]], align 8 +; CHECK-MEMDEP-NEXT: br label %[[BODY:.*]] +; CHECK-MEMDEP: [[BODY]]: +; CHECK-MEMDEP-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], %[[BODY]] ] +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[CALL]], align 4 +; CHECK-MEMDEP-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 +; CHECK-MEMDEP-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]] +; CHECK-MEMDEP-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]] +; CHECK-MEMDEP: [[EXIT]]: +; CHECK-MEMDEP-NEXT: ret void +; +; CHECK-MEMSSA-LABEL: define void @fn1( +; CHECK-MEMSSA-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]]) { +; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]] +; CHECK-MEMSSA-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024) +; CHECK-MEMSSA-NEXT: store ptr [[CALL]], ptr [[START]], align 8 +; CHECK-MEMSSA-NEXT: br label %[[PREHEADER:.*]] +; CHECK-MEMSSA: [[PREHEADER]]: +; CHECK-MEMSSA-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]] +; CHECK-MEMSSA-NEXT: br i1 [[CMP]], label %[[BODY:.*]], label %[[EXIT:.*]] +; CHECK-MEMSSA: [[BODY]]: +; CHECK-MEMSSA-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER]] ], [ [[J_NEXT:%.*]], %[[BODY]] ] +; CHECK-MEMSSA-NEXT: [[S:%.*]] = load ptr, ptr [[START]], align 8 +; CHECK-MEMSSA-NEXT: store i32 0, ptr [[S]], align 4 +; CHECK-MEMSSA-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 +; CHECK-MEMSSA-NEXT: [[W:%.*]] = load i32, ptr [[WIDTH]], align 8 +; CHECK-MEMSSA-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W]] +; CHECK-MEMSSA-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]] +; CHECK-MEMSSA: [[EXIT]]: +; CHECK-MEMSSA-NEXT: ret void ; entry: %call = tail call noalias ptr @malloc(i64 1024) @@ -52,33 +74,61 @@ exit: ; %w is partially redundant requires alias analysis that can analyze those ; values. define void @fn2(ptr noalias %start, ptr %width, i32 %h, i32 %arg) { -; CHECK-LABEL: @fn2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024) -; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ARG:%.*]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF:%.*]], label [[ELSE:%.*]] -; CHECK: if: -; CHECK-NEXT: store ptr [[CALL]], ptr [[START:%.*]], align 8 -; CHECK-NEXT: br label [[PREHEADER:%.*]] -; CHECK: else: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i32 [[ARG]] -; CHECK-NEXT: store ptr [[GEP]], ptr [[START]], align 8 -; CHECK-NEXT: br label [[PREHEADER]] -; CHECK: preheader: -; CHECK-NEXT: [[S:%.*]] = phi ptr [ [[S]], [[BODY:%.*]] ], [ [[GEP]], [[ELSE]] ], [ [[CALL]], [[IF]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[PREHEADER_BODY_CRIT_EDGE:%.*]], label [[EXIT:%.*]] -; CHECK: preheader.body_crit_edge: -; CHECK-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH:%.*]], align 8 -; CHECK-NEXT: br label [[BODY]] -; CHECK: body: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], [[BODY]] ] -; CHECK-NEXT: store i32 0, ptr [[S]], align 4 -; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]] -; CHECK-NEXT: br i1 [[CMP3]], label [[BODY]], label [[PREHEADER]] -; CHECK: exit: -; CHECK-NEXT: ret void +; CHECK-MEMDEP-LABEL: define void @fn2( +; CHECK-MEMDEP-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]], i32 [[ARG:%.*]]) { +; CHECK-MEMDEP-NEXT: [[ENTRY:.*:]] +; CHECK-MEMDEP-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024) +; CHECK-MEMDEP-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ARG]], 0 +; CHECK-MEMDEP-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK-MEMDEP: [[IF]]: +; CHECK-MEMDEP-NEXT: store ptr [[CALL]], ptr [[START]], align 8 +; CHECK-MEMDEP-NEXT: br label %[[PREHEADER:.*]] +; CHECK-MEMDEP: [[ELSE]]: +; CHECK-MEMDEP-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i32 [[ARG]] +; CHECK-MEMDEP-NEXT: store ptr [[GEP]], ptr [[START]], align 8 +; CHECK-MEMDEP-NEXT: br label %[[PREHEADER]] +; CHECK-MEMDEP: [[PREHEADER]]: +; CHECK-MEMDEP-NEXT: [[S:%.*]] = phi ptr [ [[S]], %[[BODY:.*]] ], [ [[GEP]], %[[ELSE]] ], [ [[CALL]], %[[IF]] ] +; CHECK-MEMDEP-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]] +; CHECK-MEMDEP-NEXT: br i1 [[CMP]], label %[[PREHEADER_BODY_CRIT_EDGE:.*]], label %[[EXIT:.*]] +; CHECK-MEMDEP: [[PREHEADER_BODY_CRIT_EDGE]]: +; CHECK-MEMDEP-NEXT: [[W_PRE:%.*]] = load i32, ptr [[WIDTH]], align 8 +; CHECK-MEMDEP-NEXT: br label %[[BODY]] +; CHECK-MEMDEP: [[BODY]]: +; CHECK-MEMDEP-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER_BODY_CRIT_EDGE]] ], [ [[J_NEXT:%.*]], %[[BODY]] ] +; CHECK-MEMDEP-NEXT: store i32 0, ptr [[S]], align 4 +; CHECK-MEMDEP-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 +; CHECK-MEMDEP-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W_PRE]] +; CHECK-MEMDEP-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]] +; CHECK-MEMDEP: [[EXIT]]: +; CHECK-MEMDEP-NEXT: ret void +; +; CHECK-MEMSSA-LABEL: define void @fn2( +; CHECK-MEMSSA-SAME: ptr noalias [[START:%.*]], ptr [[WIDTH:%.*]], i32 [[H:%.*]], i32 [[ARG:%.*]]) { +; CHECK-MEMSSA-NEXT: [[ENTRY:.*:]] +; CHECK-MEMSSA-NEXT: [[CALL:%.*]] = tail call noalias ptr @malloc(i64 1024) +; CHECK-MEMSSA-NEXT: [[CMP1:%.*]] = icmp slt i32 [[ARG]], 0 +; CHECK-MEMSSA-NEXT: br i1 [[CMP1]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK-MEMSSA: [[IF]]: +; CHECK-MEMSSA-NEXT: store ptr [[CALL]], ptr [[START]], align 8 +; CHECK-MEMSSA-NEXT: br label %[[PREHEADER:.*]] +; CHECK-MEMSSA: [[ELSE]]: +; CHECK-MEMSSA-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i32 [[ARG]] +; CHECK-MEMSSA-NEXT: store ptr [[GEP]], ptr [[START]], align 8 +; CHECK-MEMSSA-NEXT: br label %[[PREHEADER]] +; CHECK-MEMSSA: [[PREHEADER]]: +; CHECK-MEMSSA-NEXT: [[CMP:%.*]] = icmp slt i32 1, [[H]] +; CHECK-MEMSSA-NEXT: br i1 [[CMP]], label %[[BODY:.*]], label %[[EXIT:.*]] +; CHECK-MEMSSA: [[BODY]]: +; CHECK-MEMSSA-NEXT: [[J:%.*]] = phi i32 [ 0, %[[PREHEADER]] ], [ [[J_NEXT:%.*]], %[[BODY]] ] +; CHECK-MEMSSA-NEXT: [[S:%.*]] = load ptr, ptr [[START]], align 8 +; CHECK-MEMSSA-NEXT: store i32 0, ptr [[S]], align 4 +; CHECK-MEMSSA-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 +; CHECK-MEMSSA-NEXT: [[W:%.*]] = load i32, ptr [[WIDTH]], align 8 +; CHECK-MEMSSA-NEXT: [[CMP3:%.*]] = icmp slt i32 [[J_NEXT]], [[W]] +; CHECK-MEMSSA-NEXT: br i1 [[CMP3]], label %[[BODY]], label %[[PREHEADER]] +; CHECK-MEMSSA: [[EXIT]]: +; CHECK-MEMSSA-NEXT: ret void ; entry: %call = tail call noalias ptr @malloc(i64 1024) diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll index c81c1fe..e495163 100644 --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -1,12 +1,32 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,LE -; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,BE +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,LE,LE-MEMDEP +; RUN: opt < %s -data-layout="e-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -passes='gvn<memoryssa>',dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,LE,LE-MEMSSA +; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes=gvn,dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,BE,BE-MEMDEP +; RUN: opt < %s -data-layout="E-p:32:32:32-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -passes='gvn<memoryssa>',dce -enable-split-backedge-in-load-pre -S | FileCheck %s --check-prefixes=CHECK,BE,BE-MEMSSA ;; Trivial RLE test. define i32 @test0(i32 %V, ptr %P) { -; CHECK-LABEL: @test0( -; CHECK-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: ret i32 [[V]] +; LE-MEMDEP-LABEL: define i32 @test0( +; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: ret i32 [[V]] +; +; LE-MEMSSA-LABEL: define i32 @test0( +; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: ret i32 [[A]] +; +; BE-MEMDEP-LABEL: define i32 @test0( +; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: ret i32 [[V]] +; +; BE-MEMSSA-LABEL: define i32 @test0( +; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: ret i32 [[A]] ; store i32 %V, ptr %P @@ -21,8 +41,9 @@ define i32 @test0(i32 %V, ptr %P) { ;; PR5016 define i8 @crash0({i32, i32} %A, ptr %P) { -; CHECK-LABEL: @crash0( -; CHECK-NEXT: store { i32, i32 } [[A:%.*]], ptr [[P:%.*]], align 4 +; CHECK-LABEL: define i8 @crash0( +; CHECK-SAME: { i32, i32 } [[A:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: store { i32, i32 } [[A]], ptr [[P]], align 4 ; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[P]], align 1 ; CHECK-NEXT: ret i8 [[Y]] ; @@ -34,7 +55,7 @@ define i8 @crash0({i32, i32} %A, ptr %P) { ;; No PR filed, crashed in CaptureTracker. declare void @helper() define void @crash1() { -; CHECK-LABEL: @crash1( +; CHECK-LABEL: define void @crash1() { ; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr undef, ptr undef, i64 undef, i1 false) #[[ATTR3:[0-9]+]] ; CHECK-NEXT: ret void ; @@ -52,10 +73,29 @@ define void @crash1() { ;; i32 -> f32 forwarding. define float @coerce_mustalias1(i32 %V, ptr %P) { -; CHECK-LABEL: @coerce_mustalias1( -; CHECK-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[V]] to float -; CHECK-NEXT: ret float [[TMP1]] +; LE-MEMDEP-LABEL: define float @coerce_mustalias1( +; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[V]] to float +; LE-MEMDEP-NEXT: ret float [[TMP1]] +; +; LE-MEMSSA-LABEL: define float @coerce_mustalias1( +; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: ret float [[A]] +; +; BE-MEMDEP-LABEL: define float @coerce_mustalias1( +; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[V]] to float +; BE-MEMDEP-NEXT: ret float [[TMP1]] +; +; BE-MEMSSA-LABEL: define float @coerce_mustalias1( +; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: ret float [[A]] ; store i32 %V, ptr %P @@ -66,11 +106,31 @@ define float @coerce_mustalias1(i32 %V, ptr %P) { ;; ptr -> float forwarding. define float @coerce_mustalias2(ptr %V, ptr %P) { -; CHECK-LABEL: @coerce_mustalias2( -; CHECK-NEXT: store ptr [[V:%.*]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float -; CHECK-NEXT: ret float [[TMP2]] +; LE-MEMDEP-LABEL: define float @coerce_mustalias2( +; LE-MEMDEP-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store ptr [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; LE-MEMDEP-NEXT: ret float [[TMP2]] +; +; LE-MEMSSA-LABEL: define float @coerce_mustalias2( +; LE-MEMSSA-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store ptr [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: ret float [[A]] +; +; BE-MEMDEP-LABEL: define float @coerce_mustalias2( +; BE-MEMDEP-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store ptr [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[V]] to i32 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; BE-MEMDEP-NEXT: ret float [[TMP2]] +; +; BE-MEMSSA-LABEL: define float @coerce_mustalias2( +; BE-MEMSSA-SAME: ptr [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store ptr [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: ret float [[A]] ; store ptr %V, ptr %P @@ -81,11 +141,31 @@ define float @coerce_mustalias2(ptr %V, ptr %P) { ;; float -> ptr forwarding. define ptr @coerce_mustalias3(float %V, ptr %P) { -; CHECK-LABEL: @coerce_mustalias3( -; CHECK-NEXT: store float [[V:%.*]], ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 -; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr -; CHECK-NEXT: ret ptr [[TMP2]] +; LE-MEMDEP-LABEL: define ptr @coerce_mustalias3( +; LE-MEMDEP-SAME: float [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store float [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr +; LE-MEMDEP-NEXT: ret ptr [[TMP2]] +; +; LE-MEMSSA-LABEL: define ptr @coerce_mustalias3( +; LE-MEMSSA-SAME: float [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store float [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: ret ptr [[A]] +; +; BE-MEMDEP-LABEL: define ptr @coerce_mustalias3( +; BE-MEMDEP-SAME: float [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store float [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast float [[V]] to i32 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr +; BE-MEMDEP-NEXT: ret ptr [[TMP2]] +; +; BE-MEMSSA-LABEL: define ptr @coerce_mustalias3( +; BE-MEMSSA-SAME: float [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store float [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: ret ptr [[A]] ; store float %V, ptr %P @@ -96,14 +176,47 @@ define ptr @coerce_mustalias3(float %V, ptr %P) { ;; i32 -> f32 load forwarding. define float @coerce_mustalias4(ptr %P, i1 %cond) { -; CHECK-LABEL: @coerce_mustalias4( -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[A]] to float -; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; CHECK: T: -; CHECK-NEXT: ret float [[TMP1]] -; CHECK: F: -; CHECK-NEXT: ret float [[TMP1]] +; LE-MEMDEP-LABEL: define float @coerce_mustalias4( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[A]] to float +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: ret float [[TMP1]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: ret float [[TMP1]] +; +; LE-MEMSSA-LABEL: define float @coerce_mustalias4( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[B:%.*]] = load float, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: ret float [[B]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: [[X:%.*]] = bitcast i32 [[A]] to float +; LE-MEMSSA-NEXT: ret float [[X]] +; +; BE-MEMDEP-LABEL: define float @coerce_mustalias4( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = bitcast i32 [[A]] to float +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: ret float [[TMP1]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: ret float [[TMP1]] +; +; BE-MEMSSA-LABEL: define float @coerce_mustalias4( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[B:%.*]] = load float, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: ret float [[B]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: [[X:%.*]] = bitcast i32 [[A]] to float +; BE-MEMSSA-NEXT: ret float [[X]] ; %A = load i32, ptr %P @@ -120,16 +233,30 @@ F: ;; i32 -> i8 forwarding define i8 @coerce_mustalias5(i32 %V, ptr %P) { -; LE-LABEL: @coerce_mustalias5( -; LE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4 -; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8 -; LE-NEXT: ret i8 [[TMP1]] +; LE-MEMDEP-LABEL: define i8 @coerce_mustalias5( +; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[V]] to i8 +; LE-MEMDEP-NEXT: ret i8 [[TMP1]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_mustalias5( +; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_mustalias5( +; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 24 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +; BE-MEMDEP-NEXT: ret i8 [[TMP2]] ; -; BE-LABEL: @coerce_mustalias5( -; BE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4 -; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 24 -; BE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -; BE-NEXT: ret i8 [[TMP2]] +; BE-MEMSSA-LABEL: define i8 @coerce_mustalias5( +; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; store i32 %V, ptr %P @@ -140,18 +267,32 @@ define i8 @coerce_mustalias5(i32 %V, ptr %P) { ;; i64 -> float forwarding define float @coerce_mustalias6(i64 %V, ptr %P) { -; LE-LABEL: @coerce_mustalias6( -; LE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4 -; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32 -; LE-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float -; LE-NEXT: ret float [[TMP2]] -; -; BE-LABEL: @coerce_mustalias6( -; BE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4 -; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32 -; BE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; BE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float -; BE-NEXT: ret float [[TMP3]] +; LE-MEMDEP-LABEL: define float @coerce_mustalias6( +; LE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = bitcast i32 [[TMP1]] to float +; LE-MEMDEP-NEXT: ret float [[TMP2]] +; +; LE-MEMSSA-LABEL: define float @coerce_mustalias6( +; LE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: ret float [[A]] +; +; BE-MEMDEP-LABEL: define float @coerce_mustalias6( +; BE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; BE-MEMDEP-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to float +; BE-MEMDEP-NEXT: ret float [[TMP3]] +; +; BE-MEMSSA-LABEL: define float @coerce_mustalias6( +; BE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load float, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: ret float [[A]] ; store i64 %V, ptr %P @@ -162,18 +303,32 @@ define float @coerce_mustalias6(i64 %V, ptr %P) { ;; i64 -> ptr (32-bit) forwarding define ptr @coerce_mustalias7(i64 %V, ptr %P) { -; LE-LABEL: @coerce_mustalias7( -; LE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4 -; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32 -; LE-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr -; LE-NEXT: ret ptr [[TMP2]] -; -; BE-LABEL: @coerce_mustalias7( -; BE-NEXT: store i64 [[V:%.*]], ptr [[P:%.*]], align 4 -; BE-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32 -; BE-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 -; BE-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr -; BE-NEXT: ret ptr [[TMP3]] +; LE-MEMDEP-LABEL: define ptr @coerce_mustalias7( +; LE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[V]] to i32 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = inttoptr i32 [[TMP1]] to ptr +; LE-MEMDEP-NEXT: ret ptr [[TMP2]] +; +; LE-MEMSSA-LABEL: define ptr @coerce_mustalias7( +; LE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: ret ptr [[A]] +; +; BE-MEMDEP-LABEL: define ptr @coerce_mustalias7( +; BE-MEMDEP-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store i64 [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i64 [[V]], 32 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; BE-MEMDEP-NEXT: [[TMP3:%.*]] = inttoptr i32 [[TMP2]] to ptr +; BE-MEMDEP-NEXT: ret ptr [[TMP3]] +; +; BE-MEMSSA-LABEL: define ptr @coerce_mustalias7( +; BE-MEMSSA-SAME: i64 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store i64 [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: ret ptr [[A]] ; store i64 %V, ptr %P @@ -184,10 +339,33 @@ define ptr @coerce_mustalias7(i64 %V, ptr %P) { ; memset -> i16 forwarding. define signext i16 @memset_to_i16_local(ptr %A) nounwind ssp { -; CHECK-LABEL: @memset_to_i16_local( -; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A:%.*]], i8 1, i64 200, i1 false) -; CHECK-NEXT: ret i16 257 +; LE-MEMDEP-LABEL: define signext i16 @memset_to_i16_local( +; LE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false) +; LE-MEMDEP-NEXT: ret i16 257 +; +; LE-MEMSSA-LABEL: define signext i16 @memset_to_i16_local( +; LE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false) +; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 42 +; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; LE-MEMSSA-NEXT: ret i16 [[TTMP2]] +; +; BE-MEMDEP-LABEL: define signext i16 @memset_to_i16_local( +; BE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false) +; BE-MEMDEP-NEXT: ret i16 257 +; +; BE-MEMSSA-LABEL: define signext i16 @memset_to_i16_local( +; BE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 1, i64 200, i1 false) +; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[A]], i64 42 +; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; BE-MEMSSA-NEXT: ret i16 [[TTMP2]] ; entry: tail call void @llvm.memset.p0.i64(ptr %A, i8 1, i64 200, i1 false) @@ -198,16 +376,45 @@ entry: ; memset -> float forwarding. define float @memset_to_float_local(ptr %A, i8 %Val) nounwind ssp { -; CHECK-LABEL: @memset_to_float_local( -; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A:%.*]], i8 [[VAL:%.*]], i64 400, i1 false) -; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8 -; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 -; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float -; CHECK-NEXT: ret float [[TMP5]] +; LE-MEMDEP-LABEL: define float @memset_to_float_local( +; LE-MEMDEP-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false) +; LE-MEMDEP-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]] +; LE-MEMDEP-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 +; LE-MEMDEP-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] +; LE-MEMDEP-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; LE-MEMDEP-NEXT: ret float [[TMP5]] +; +; LE-MEMSSA-LABEL: define float @memset_to_float_local( +; LE-MEMSSA-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false) +; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 42 +; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; LE-MEMSSA-NEXT: ret float [[TTMP2]] +; +; BE-MEMDEP-LABEL: define float @memset_to_float_local( +; BE-MEMDEP-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false) +; BE-MEMDEP-NEXT: [[TMP0:%.*]] = zext i8 [[VAL]] to i32 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 8 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = or i32 [[TMP0]], [[TMP1]] +; BE-MEMDEP-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 16 +; BE-MEMDEP-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] +; BE-MEMDEP-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to float +; BE-MEMDEP-NEXT: ret float [[TMP5]] +; +; BE-MEMSSA-LABEL: define float @memset_to_float_local( +; BE-MEMSSA-SAME: ptr [[A:%.*]], i8 [[VAL:%.*]]) #[[ATTR0]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[A]], i8 [[VAL]], i64 400, i1 false) +; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 42 +; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; BE-MEMSSA-NEXT: ret float [[TTMP2]] ; entry: tail call void @llvm.memset.p0.i64(ptr %A, i8 %Val, i64 400, i1 false) @@ -218,17 +425,59 @@ entry: ;; non-local memset -> i16 load forwarding. define i16 @memset_to_i16_nonlocal0(ptr %P, i1 %cond) { -; CHECK-LABEL: @memset_to_i16_nonlocal0( -; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; CHECK: T: -; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P:%.*]], i8 1, i64 400, i1 false) -; CHECK-NEXT: br label [[CONT:%.*]] -; CHECK: F: -; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false) -; CHECK-NEXT: br label [[CONT]] -; CHECK: Cont: -; CHECK-NEXT: [[A:%.*]] = phi i16 [ 514, [[F]] ], [ 257, [[T]] ] -; CHECK-NEXT: ret i16 [[A]] +; LE-MEMDEP-LABEL: define i16 @memset_to_i16_nonlocal0( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false) +; LE-MEMDEP-NEXT: br label %[[CONT:.*]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false) +; LE-MEMDEP-NEXT: br label %[[CONT]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[A:%.*]] = phi i16 [ 514, %[[F]] ], [ 257, %[[T]] ] +; LE-MEMDEP-NEXT: ret i16 [[A]] +; +; LE-MEMSSA-LABEL: define i16 @memset_to_i16_nonlocal0( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false) +; LE-MEMSSA-NEXT: br label %[[CONT:.*]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false) +; LE-MEMSSA-NEXT: br label %[[CONT]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 4 +; LE-MEMSSA-NEXT: [[A:%.*]] = load i16, ptr [[P2]], align 2 +; LE-MEMSSA-NEXT: ret i16 [[A]] +; +; BE-MEMDEP-LABEL: define i16 @memset_to_i16_nonlocal0( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false) +; BE-MEMDEP-NEXT: br label %[[CONT:.*]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false) +; BE-MEMDEP-NEXT: br label %[[CONT]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[A:%.*]] = phi i16 [ 514, %[[F]] ], [ 257, %[[T]] ] +; BE-MEMDEP-NEXT: ret i16 [[A]] +; +; BE-MEMSSA-LABEL: define i16 @memset_to_i16_nonlocal0( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 1, i64 400, i1 false) +; BE-MEMSSA-NEXT: br label %[[CONT:.*]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: tail call void @llvm.memset.p0.i64(ptr [[P]], i8 2, i64 400, i1 false) +; BE-MEMSSA-NEXT: br label %[[CONT]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i16, ptr [[P]], i32 4 +; BE-MEMSSA-NEXT: [[A:%.*]] = load i16, ptr [[P2]], align 2 +; BE-MEMSSA-NEXT: ret i16 [[A]] ; br i1 %cond, label %T, label %F T: @@ -251,10 +500,33 @@ Cont: ; memset -> float forwarding. define float @memcpy_to_float_local(ptr %A) nounwind ssp { -; CHECK-LABEL: @memcpy_to_float_local( -; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A:%.*]], ptr @GCst, i64 12, i1 false) -; CHECK-NEXT: ret float 1.400000e+01 +; LE-MEMDEP-LABEL: define float @memcpy_to_float_local( +; LE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false) +; LE-MEMDEP-NEXT: ret float 1.400000e+01 +; +; LE-MEMSSA-LABEL: define float @memcpy_to_float_local( +; LE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false) +; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 +; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; LE-MEMSSA-NEXT: ret float [[TTMP2]] +; +; BE-MEMDEP-LABEL: define float @memcpy_to_float_local( +; BE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false) +; BE-MEMDEP-NEXT: ret float 1.400000e+01 +; +; BE-MEMSSA-LABEL: define float @memcpy_to_float_local( +; BE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[A]], ptr @GCst, i64 12, i1 false) +; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 +; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; BE-MEMSSA-NEXT: ret float [[TTMP2]] ; entry: tail call void @llvm.memcpy.p0.p0.i64(ptr %A, ptr @GCst, i64 12, i1 false) @@ -265,10 +537,33 @@ entry: ; memcpy from address space 1 define float @memcpy_to_float_local_as1(ptr %A) nounwind ssp { -; CHECK-LABEL: @memcpy_to_float_local_as1( -; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A:%.*]], ptr addrspace(1) @GCst_as1, i64 12, i1 false) -; CHECK-NEXT: ret float 1.400000e+01 +; LE-MEMDEP-LABEL: define float @memcpy_to_float_local_as1( +; LE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false) +; LE-MEMDEP-NEXT: ret float 1.400000e+01 +; +; LE-MEMSSA-LABEL: define float @memcpy_to_float_local_as1( +; LE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false) +; LE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 +; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; LE-MEMSSA-NEXT: ret float [[TTMP2]] +; +; BE-MEMDEP-LABEL: define float @memcpy_to_float_local_as1( +; BE-MEMDEP-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false) +; BE-MEMDEP-NEXT: ret float 1.400000e+01 +; +; BE-MEMSSA-LABEL: define float @memcpy_to_float_local_as1( +; BE-MEMSSA-SAME: ptr [[A:%.*]]) #[[ATTR0]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr [[A]], ptr addrspace(1) @GCst_as1, i64 12, i1 false) +; BE-MEMSSA-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 1 +; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; BE-MEMSSA-NEXT: ret float [[TTMP2]] ; entry: tail call void @llvm.memcpy.p0.p1.i64(ptr %A, ptr addrspace(1) @GCst_as1, i64 12, i1 false) @@ -279,29 +574,57 @@ entry: ;; non-local i32/float -> i8 load forwarding. define i8 @coerce_mustalias_nonlocal0(ptr %P, i1 %cond) { -; LE-LABEL: @coerce_mustalias_nonlocal0( -; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; LE: T: -; LE-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; LE-NEXT: br label [[CONT:%.*]] -; LE: F: -; LE-NEXT: store float 1.000000e+00, ptr [[P]], align 4 -; LE-NEXT: br label [[CONT]] -; LE: Cont: -; LE-NEXT: [[A:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ] -; LE-NEXT: ret i8 [[A]] -; -; BE-LABEL: @coerce_mustalias_nonlocal0( -; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; BE: T: -; BE-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; BE-NEXT: br label [[CONT:%.*]] -; BE: F: -; BE-NEXT: store float 1.000000e+00, ptr [[P]], align 4 -; BE-NEXT: br label [[CONT]] -; BE: Cont: -; BE-NEXT: [[A:%.*]] = phi i8 [ 63, [[F]] ], [ 0, [[T]] ] -; BE-NEXT: ret i8 [[A]] +; LE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal0( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT:.*]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 0, %[[F]] ], [ 42, %[[T]] ] +; LE-MEMDEP-NEXT: ret i8 [[A]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal0( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT:.*]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal0( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT:.*]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 63, %[[F]] ], [ 0, %[[T]] ] +; BE-MEMDEP-NEXT: ret i8 [[A]] +; +; BE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal0( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT:.*]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; br i1 %cond, label %T, label %F T: @@ -322,29 +645,57 @@ Cont: ;; non-local i32/float -> i8 load forwarding. This also tests that the "P3" ;; bitcast equivalence can be properly phi translated. define i8 @coerce_mustalias_nonlocal1(ptr %P, i1 %cond) { -; LE-LABEL: @coerce_mustalias_nonlocal1( -; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; LE: T: -; LE-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; LE-NEXT: br label [[CONT:%.*]] -; LE: F: -; LE-NEXT: store float 1.000000e+00, ptr [[P]], align 4 -; LE-NEXT: br label [[CONT]] -; LE: Cont: -; LE-NEXT: [[A:%.*]] = phi i8 [ 0, [[F]] ], [ 42, [[T]] ] -; LE-NEXT: ret i8 [[A]] -; -; BE-LABEL: @coerce_mustalias_nonlocal1( -; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; BE: T: -; BE-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; BE-NEXT: br label [[CONT:%.*]] -; BE: F: -; BE-NEXT: store float 1.000000e+00, ptr [[P]], align 4 -; BE-NEXT: br label [[CONT]] -; BE: Cont: -; BE-NEXT: [[A:%.*]] = phi i8 [ 63, [[F]] ], [ 0, [[T]] ] -; BE-NEXT: ret i8 [[A]] +; LE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal1( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT:.*]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 0, %[[F]] ], [ 42, %[[T]] ] +; LE-MEMDEP-NEXT: ret i8 [[A]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal1( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT:.*]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_mustalias_nonlocal1( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT:.*]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 63, %[[F]] ], [ 0, %[[T]] ] +; BE-MEMDEP-NEXT: ret i8 [[A]] +; +; BE-MEMSSA-LABEL: define i8 @coerce_mustalias_nonlocal1( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT:.*]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; br i1 %cond, label %T, label %F T: @@ -364,29 +715,55 @@ Cont: ;; non-local i32 -> i8 partial redundancy load forwarding. define i8 @coerce_mustalias_pre0(ptr %P, i1 %cond) { -; LE-LABEL: @coerce_mustalias_pre0( -; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; LE: T: -; LE-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; LE-NEXT: br label [[CONT:%.*]] -; LE: F: -; LE-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1 -; LE-NEXT: br label [[CONT]] -; LE: Cont: -; LE-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], [[F]] ], [ 42, [[T]] ] -; LE-NEXT: ret i8 [[A]] -; -; BE-LABEL: @coerce_mustalias_pre0( -; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; BE: T: -; BE-NEXT: store i32 42, ptr [[P:%.*]], align 4 -; BE-NEXT: br label [[CONT:%.*]] -; BE: F: -; BE-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1 -; BE-NEXT: br label [[CONT]] -; BE: Cont: -; BE-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], [[F]] ], [ 0, [[T]] ] -; BE-NEXT: ret i8 [[A]] +; LE-MEMDEP-LABEL: define i8 @coerce_mustalias_pre0( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT:.*]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1 +; LE-MEMDEP-NEXT: br label %[[CONT]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 42, %[[T]] ] +; LE-MEMDEP-NEXT: ret i8 [[A]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_mustalias_pre0( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT:.*]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: br label %[[CONT]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_mustalias_pre0( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT:.*]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P]], align 1 +; BE-MEMDEP-NEXT: br label %[[CONT]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 0, %[[T]] ] +; BE-MEMDEP-NEXT: ret i8 [[A]] +; +; BE-MEMSSA-LABEL: define i8 @coerce_mustalias_pre0( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT:.*]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: br label %[[CONT]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; br i1 %cond, label %T, label %F T: @@ -410,17 +787,33 @@ Cont: ;; i32 -> i8 forwarding. ;; PR4216 define i8 @coerce_offset0(i32 %V, ptr %P) { -; LE-LABEL: @coerce_offset0( -; LE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4 -; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 16 -; LE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -; LE-NEXT: ret i8 [[TMP2]] -; -; BE-LABEL: @coerce_offset0( -; BE-NEXT: store i32 [[V:%.*]], ptr [[P:%.*]], align 4 -; BE-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 8 -; BE-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 -; BE-NEXT: ret i8 [[TMP2]] +; LE-MEMDEP-LABEL: define i8 @coerce_offset0( +; LE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 16 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +; LE-MEMDEP-NEXT: ret i8 [[TMP2]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_offset0( +; LE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P3]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_offset0( +; BE-MEMDEP-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 8 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 +; BE-MEMDEP-NEXT: ret i8 [[TMP2]] +; +; BE-MEMSSA-LABEL: define i8 @coerce_offset0( +; BE-MEMSSA-SAME: i32 [[V:%.*]], ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: store i32 [[V]], ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P3]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; store i32 %V, ptr %P @@ -432,29 +825,59 @@ define i8 @coerce_offset0(i32 %V, ptr %P) { ;; non-local i32/float -> i8 load forwarding. define i8 @coerce_offset_nonlocal0(ptr %P, i1 %cond) { -; LE-LABEL: @coerce_offset_nonlocal0( -; LE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; LE: T: -; LE-NEXT: store i32 57005, ptr [[P:%.*]], align 4 -; LE-NEXT: br label [[CONT:%.*]] -; LE: F: -; LE-NEXT: store float 1.000000e+00, ptr [[P]], align 4 -; LE-NEXT: br label [[CONT]] -; LE: Cont: -; LE-NEXT: [[A:%.*]] = phi i8 [ -128, [[F]] ], [ 0, [[T]] ] -; LE-NEXT: ret i8 [[A]] -; -; BE-LABEL: @coerce_offset_nonlocal0( -; BE-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; BE: T: -; BE-NEXT: store i32 57005, ptr [[P:%.*]], align 4 -; BE-NEXT: br label [[CONT:%.*]] -; BE: F: -; BE-NEXT: store float 1.000000e+00, ptr [[P]], align 4 -; BE-NEXT: br label [[CONT]] -; BE: Cont: -; BE-NEXT: [[A:%.*]] = phi i8 [ 0, [[F]] ], [ -34, [[T]] ] -; BE-NEXT: ret i8 [[A]] +; LE-MEMDEP-LABEL: define i8 @coerce_offset_nonlocal0( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: store i32 57005, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT:.*]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ -128, %[[F]] ], [ 0, %[[T]] ] +; LE-MEMDEP-NEXT: ret i8 [[A]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_offset_nonlocal0( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: store i32 57005, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT:.*]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_offset_nonlocal0( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: store i32 57005, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT:.*]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ 0, %[[F]] ], [ -34, %[[T]] ] +; BE-MEMDEP-NEXT: ret i8 [[A]] +; +; BE-MEMSSA-LABEL: define i8 @coerce_offset_nonlocal0( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: store i32 57005, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT:.*]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: store float 1.000000e+00, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; %P4 = getelementptr i8, ptr %P, i32 2 br i1 %cond, label %T, label %F @@ -475,18 +898,59 @@ Cont: ;; non-local i32 -> i8 partial redundancy load forwarding. define i8 @coerce_offset_pre0(ptr %P, i1 %cond) { -; CHECK-LABEL: @coerce_offset_pre0( -; CHECK-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 -; CHECK-NEXT: br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]] -; CHECK: T: -; CHECK-NEXT: store i32 42, ptr [[P]], align 4 -; CHECK-NEXT: br label [[CONT:%.*]] -; CHECK: F: -; CHECK-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P4]], align 1 -; CHECK-NEXT: br label [[CONT]] -; CHECK: Cont: -; CHECK-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], [[F]] ], [ 0, [[T]] ] -; CHECK-NEXT: ret i8 [[A]] +; LE-MEMDEP-LABEL: define i8 @coerce_offset_pre0( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMDEP: [[T]]: +; LE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[CONT:.*]] +; LE-MEMDEP: [[F]]: +; LE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P4]], align 1 +; LE-MEMDEP-NEXT: br label %[[CONT]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 0, %[[T]] ] +; LE-MEMDEP-NEXT: ret i8 [[A]] +; +; LE-MEMSSA-LABEL: define i8 @coerce_offset_pre0( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; LE-MEMSSA: [[T]]: +; LE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[CONT:.*]] +; LE-MEMSSA: [[F]]: +; LE-MEMSSA-NEXT: br label %[[CONT]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1 +; LE-MEMSSA-NEXT: ret i8 [[A]] +; +; BE-MEMDEP-LABEL: define i8 @coerce_offset_pre0( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMDEP: [[T]]: +; BE-MEMDEP-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[CONT:.*]] +; BE-MEMDEP: [[F]]: +; BE-MEMDEP-NEXT: [[A_PRE:%.*]] = load i8, ptr [[P4]], align 1 +; BE-MEMDEP-NEXT: br label %[[CONT]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[A:%.*]] = phi i8 [ [[A_PRE]], %[[F]] ], [ 0, %[[T]] ] +; BE-MEMDEP-NEXT: ret i8 [[A]] +; +; BE-MEMSSA-LABEL: define i8 @coerce_offset_pre0( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: [[P4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[T:.*]], label %[[F:.*]] +; BE-MEMSSA: [[T]]: +; BE-MEMSSA-NEXT: store i32 42, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[CONT:.*]] +; BE-MEMSSA: [[F]]: +; BE-MEMSSA-NEXT: br label %[[CONT]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[A:%.*]] = load i8, ptr [[P4]], align 1 +; BE-MEMSSA-NEXT: ret i8 [[A]] ; %P4 = getelementptr i8, ptr %P, i32 2 br i1 %cond, label %T, label %F @@ -504,20 +968,71 @@ Cont: } define i32 @chained_load(ptr %p, i32 %x, i32 %y) { -; CHECK-LABEL: @chained_load( -; CHECK-NEXT: block1: -; CHECK-NEXT: [[A:%.*]] = alloca ptr, align 4 -; CHECK-NEXT: [[Z:%.*]] = load ptr, ptr [[P:%.*]], align 4 -; CHECK-NEXT: store ptr [[Z]], ptr [[A]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]] -; CHECK: block2: -; CHECK-NEXT: br label [[BLOCK4:%.*]] -; CHECK: block3: -; CHECK-NEXT: br label [[BLOCK4]] -; CHECK: block4: -; CHECK-NEXT: [[D:%.*]] = load i32, ptr [[Z]], align 4 -; CHECK-NEXT: ret i32 [[D]] +; LE-MEMDEP-LABEL: define i32 @chained_load( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) { +; LE-MEMDEP-NEXT: [[BLOCK1:.*:]] +; LE-MEMDEP-NEXT: [[A:%.*]] = alloca ptr, align 4 +; LE-MEMDEP-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: store ptr [[Z]], ptr [[A]], align 4 +; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]] +; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; LE-MEMDEP: [[BLOCK2]]: +; LE-MEMDEP-NEXT: br label %[[BLOCK4:.*]] +; LE-MEMDEP: [[BLOCK3]]: +; LE-MEMDEP-NEXT: br label %[[BLOCK4]] +; LE-MEMDEP: [[BLOCK4]]: +; LE-MEMDEP-NEXT: [[D:%.*]] = load i32, ptr [[Z]], align 4 +; LE-MEMDEP-NEXT: ret i32 [[D]] +; +; LE-MEMSSA-LABEL: define i32 @chained_load( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) { +; LE-MEMSSA-NEXT: [[BLOCK1:.*:]] +; LE-MEMSSA-NEXT: [[A:%.*]] = alloca ptr, align 4 +; LE-MEMSSA-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: store ptr [[Z]], ptr [[A]], align 4 +; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]] +; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; LE-MEMSSA: [[BLOCK2]]: +; LE-MEMSSA-NEXT: br label %[[BLOCK4:.*]] +; LE-MEMSSA: [[BLOCK3]]: +; LE-MEMSSA-NEXT: br label %[[BLOCK4]] +; LE-MEMSSA: [[BLOCK4]]: +; LE-MEMSSA-NEXT: [[C:%.*]] = load ptr, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4 +; LE-MEMSSA-NEXT: ret i32 [[D]] +; +; BE-MEMDEP-LABEL: define i32 @chained_load( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) { +; BE-MEMDEP-NEXT: [[BLOCK1:.*:]] +; BE-MEMDEP-NEXT: [[A:%.*]] = alloca ptr, align 4 +; BE-MEMDEP-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: store ptr [[Z]], ptr [[A]], align 4 +; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]] +; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; BE-MEMDEP: [[BLOCK2]]: +; BE-MEMDEP-NEXT: br label %[[BLOCK4:.*]] +; BE-MEMDEP: [[BLOCK3]]: +; BE-MEMDEP-NEXT: br label %[[BLOCK4]] +; BE-MEMDEP: [[BLOCK4]]: +; BE-MEMDEP-NEXT: [[D:%.*]] = load i32, ptr [[Z]], align 4 +; BE-MEMDEP-NEXT: ret i32 [[D]] +; +; BE-MEMSSA-LABEL: define i32 @chained_load( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]]) { +; BE-MEMSSA-NEXT: [[BLOCK1:.*:]] +; BE-MEMSSA-NEXT: [[A:%.*]] = alloca ptr, align 4 +; BE-MEMSSA-NEXT: [[Z:%.*]] = load ptr, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: store ptr [[Z]], ptr [[A]], align 4 +; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]] +; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; BE-MEMSSA: [[BLOCK2]]: +; BE-MEMSSA-NEXT: br label %[[BLOCK4:.*]] +; BE-MEMSSA: [[BLOCK3]]: +; BE-MEMSSA-NEXT: br label %[[BLOCK4]] +; BE-MEMSSA: [[BLOCK4]]: +; BE-MEMSSA-NEXT: [[C:%.*]] = load ptr, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4 +; BE-MEMSSA-NEXT: ret i32 [[D]] ; block1: %A = alloca ptr @@ -547,27 +1062,27 @@ declare i1 @cond() readonly declare i1 @cond2() readonly define i32 @phi_trans2() { -; CHECK-LABEL: @phi_trans2( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @phi_trans2() { +; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[P:%.*]] = alloca i32, i32 400, align 4 -; CHECK-NEXT: br label [[F1:%.*]] -; CHECK: F1: -; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 2, [[F:%.*]] ] +; CHECK-NEXT: br label %[[F1:.*]] +; CHECK: [[F1]]: +; CHECK-NEXT: [[A:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ 2, %[[F:.*]] ] ; CHECK-NEXT: [[COND2:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[COND2]], label [[T1:%.*]], label [[TY:%.*]] -; CHECK: T1: +; CHECK-NEXT: br i1 [[COND2]], label %[[T1:.*]], label %[[TY:.*]] +; CHECK: [[T1]]: ; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 [[A]] ; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P2]], align 4 ; CHECK-NEXT: [[COND:%.*]] = call i1 @cond2() -; CHECK-NEXT: br i1 [[COND]], label [[TX:%.*]], label [[F]] -; CHECK: F: +; CHECK-NEXT: br i1 [[COND]], label %[[TX:.*]], label %[[F]] +; CHECK: [[F]]: ; CHECK-NEXT: [[P3:%.*]] = getelementptr i32, ptr [[P]], i32 2 ; CHECK-NEXT: store i32 17, ptr [[P3]], align 4 ; CHECK-NEXT: store i32 42, ptr [[P2]], align 4 -; CHECK-NEXT: br label [[F1]] -; CHECK: TX: +; CHECK-NEXT: br label %[[F1]] +; CHECK: [[TX]]: ; CHECK-NEXT: ret i32 [[X]] -; CHECK: TY: +; CHECK: [[TY]]: ; CHECK-NEXT: ret i32 0 ; entry: @@ -605,32 +1120,123 @@ TY: } define i32 @phi_trans3(ptr %p, i32 %x, i32 %y, i32 %z) { -; CHECK-LABEL: @phi_trans3( -; CHECK-NEXT: block1: -; CHECK-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: br i1 [[CMPXY]], label [[BLOCK2:%.*]], label [[BLOCK3:%.*]] -; CHECK: block2: -; CHECK-NEXT: store i32 87, ptr [[P:%.*]], align 4 -; CHECK-NEXT: br label [[BLOCK4:%.*]] -; CHECK: block3: -; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43 -; CHECK-NEXT: store i32 97, ptr [[P2]], align 4 -; CHECK-NEXT: br label [[BLOCK4]] -; CHECK: block4: -; CHECK-NEXT: [[D:%.*]] = phi i32 [ 87, [[BLOCK2]] ], [ 97, [[BLOCK3]] ] -; CHECK-NEXT: br i1 [[CMPXY]], label [[BLOCK5:%.*]], label [[EXIT:%.*]] -; CHECK: block5: -; CHECK-NEXT: br i1 true, label [[BLOCK6:%.*]], label [[BLOCK5_EXIT_CRIT_EDGE:%.*]] -; CHECK: block5.exit_crit_edge: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: block6: -; CHECK-NEXT: br i1 true, label [[BLOCK7:%.*]], label [[BLOCK6_EXIT_CRIT_EDGE:%.*]] -; CHECK: block6.exit_crit_edge: -; CHECK-NEXT: br label [[EXIT]] -; CHECK: block7: -; CHECK-NEXT: ret i32 [[D]] -; CHECK: exit: -; CHECK-NEXT: ret i32 -1 +; LE-MEMDEP-LABEL: define i32 @phi_trans3( +; LE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; LE-MEMDEP-NEXT: [[BLOCK1:.*:]] +; LE-MEMDEP-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]] +; LE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; LE-MEMDEP: [[BLOCK2]]: +; LE-MEMDEP-NEXT: store i32 87, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: br label %[[BLOCK4:.*]] +; LE-MEMDEP: [[BLOCK3]]: +; LE-MEMDEP-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43 +; LE-MEMDEP-NEXT: store i32 97, ptr [[P2]], align 4 +; LE-MEMDEP-NEXT: br label %[[BLOCK4]] +; LE-MEMDEP: [[BLOCK4]]: +; LE-MEMDEP-NEXT: [[D:%.*]] = phi i32 [ 87, %[[BLOCK2]] ], [ 97, %[[BLOCK3]] ] +; LE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]] +; LE-MEMDEP: [[BLOCK5]]: +; LE-MEMDEP-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]] +; LE-MEMDEP: [[BLOCK5_EXIT_CRIT_EDGE]]: +; LE-MEMDEP-NEXT: br label %[[EXIT]] +; LE-MEMDEP: [[BLOCK6]]: +; LE-MEMDEP-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]] +; LE-MEMDEP: [[BLOCK6_EXIT_CRIT_EDGE]]: +; LE-MEMDEP-NEXT: br label %[[EXIT]] +; LE-MEMDEP: [[BLOCK7]]: +; LE-MEMDEP-NEXT: ret i32 [[D]] +; LE-MEMDEP: [[EXIT]]: +; LE-MEMDEP-NEXT: ret i32 -1 +; +; LE-MEMSSA-LABEL: define i32 @phi_trans3( +; LE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; LE-MEMSSA-NEXT: [[BLOCK1:.*:]] +; LE-MEMSSA-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]] +; LE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; LE-MEMSSA: [[BLOCK2]]: +; LE-MEMSSA-NEXT: store i32 87, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: br label %[[BLOCK4:.*]] +; LE-MEMSSA: [[BLOCK3]]: +; LE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43 +; LE-MEMSSA-NEXT: store i32 97, ptr [[P2]], align 4 +; LE-MEMSSA-NEXT: br label %[[BLOCK4]] +; LE-MEMSSA: [[BLOCK4]]: +; LE-MEMSSA-NEXT: [[A:%.*]] = phi i32 [ -1, %[[BLOCK2]] ], [ 42, %[[BLOCK3]] ] +; LE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]] +; LE-MEMSSA: [[BLOCK5]]: +; LE-MEMSSA-NEXT: [[B:%.*]] = add i32 [[A]], 1 +; LE-MEMSSA-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]] +; LE-MEMSSA: [[BLOCK5_EXIT_CRIT_EDGE]]: +; LE-MEMSSA-NEXT: br label %[[EXIT]] +; LE-MEMSSA: [[BLOCK6]]: +; LE-MEMSSA-NEXT: [[C:%.*]] = getelementptr i32, ptr [[P]], i32 [[B]] +; LE-MEMSSA-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]] +; LE-MEMSSA: [[BLOCK6_EXIT_CRIT_EDGE]]: +; LE-MEMSSA-NEXT: br label %[[EXIT]] +; LE-MEMSSA: [[BLOCK7]]: +; LE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4 +; LE-MEMSSA-NEXT: ret i32 [[D]] +; LE-MEMSSA: [[EXIT]]: +; LE-MEMSSA-NEXT: ret i32 -1 +; +; BE-MEMDEP-LABEL: define i32 @phi_trans3( +; BE-MEMDEP-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; BE-MEMDEP-NEXT: [[BLOCK1:.*:]] +; BE-MEMDEP-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]] +; BE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; BE-MEMDEP: [[BLOCK2]]: +; BE-MEMDEP-NEXT: store i32 87, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: br label %[[BLOCK4:.*]] +; BE-MEMDEP: [[BLOCK3]]: +; BE-MEMDEP-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43 +; BE-MEMDEP-NEXT: store i32 97, ptr [[P2]], align 4 +; BE-MEMDEP-NEXT: br label %[[BLOCK4]] +; BE-MEMDEP: [[BLOCK4]]: +; BE-MEMDEP-NEXT: [[D:%.*]] = phi i32 [ 87, %[[BLOCK2]] ], [ 97, %[[BLOCK3]] ] +; BE-MEMDEP-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]] +; BE-MEMDEP: [[BLOCK5]]: +; BE-MEMDEP-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]] +; BE-MEMDEP: [[BLOCK5_EXIT_CRIT_EDGE]]: +; BE-MEMDEP-NEXT: br label %[[EXIT]] +; BE-MEMDEP: [[BLOCK6]]: +; BE-MEMDEP-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]] +; BE-MEMDEP: [[BLOCK6_EXIT_CRIT_EDGE]]: +; BE-MEMDEP-NEXT: br label %[[EXIT]] +; BE-MEMDEP: [[BLOCK7]]: +; BE-MEMDEP-NEXT: ret i32 [[D]] +; BE-MEMDEP: [[EXIT]]: +; BE-MEMDEP-NEXT: ret i32 -1 +; +; BE-MEMSSA-LABEL: define i32 @phi_trans3( +; BE-MEMSSA-SAME: ptr [[P:%.*]], i32 [[X:%.*]], i32 [[Y:%.*]], i32 [[Z:%.*]]) { +; BE-MEMSSA-NEXT: [[BLOCK1:.*:]] +; BE-MEMSSA-NEXT: [[CMPXY:%.*]] = icmp eq i32 [[X]], [[Y]] +; BE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK2:.*]], label %[[BLOCK3:.*]] +; BE-MEMSSA: [[BLOCK2]]: +; BE-MEMSSA-NEXT: store i32 87, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: br label %[[BLOCK4:.*]] +; BE-MEMSSA: [[BLOCK3]]: +; BE-MEMSSA-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P]], i32 43 +; BE-MEMSSA-NEXT: store i32 97, ptr [[P2]], align 4 +; BE-MEMSSA-NEXT: br label %[[BLOCK4]] +; BE-MEMSSA: [[BLOCK4]]: +; BE-MEMSSA-NEXT: [[A:%.*]] = phi i32 [ -1, %[[BLOCK2]] ], [ 42, %[[BLOCK3]] ] +; BE-MEMSSA-NEXT: br i1 [[CMPXY]], label %[[BLOCK5:.*]], label %[[EXIT:.*]] +; BE-MEMSSA: [[BLOCK5]]: +; BE-MEMSSA-NEXT: [[B:%.*]] = add i32 [[A]], 1 +; BE-MEMSSA-NEXT: br i1 true, label %[[BLOCK6:.*]], label %[[BLOCK5_EXIT_CRIT_EDGE:.*]] +; BE-MEMSSA: [[BLOCK5_EXIT_CRIT_EDGE]]: +; BE-MEMSSA-NEXT: br label %[[EXIT]] +; BE-MEMSSA: [[BLOCK6]]: +; BE-MEMSSA-NEXT: [[C:%.*]] = getelementptr i32, ptr [[P]], i32 [[B]] +; BE-MEMSSA-NEXT: br i1 true, label %[[BLOCK7:.*]], label %[[BLOCK6_EXIT_CRIT_EDGE:.*]] +; BE-MEMSSA: [[BLOCK6_EXIT_CRIT_EDGE]]: +; BE-MEMSSA-NEXT: br label %[[EXIT]] +; BE-MEMSSA: [[BLOCK7]]: +; BE-MEMSSA-NEXT: [[D:%.*]] = load i32, ptr [[C]], align 4 +; BE-MEMSSA-NEXT: ret i32 [[D]] +; BE-MEMSSA: [[EXIT]]: +; BE-MEMSSA-NEXT: ret i32 -1 ; block1: %cmpxy = icmp eq i32 %x, %y @@ -668,21 +1274,77 @@ exit: } define i8 @phi_trans4(ptr %p) { -; CHECK-LABEL: @phi_trans4( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 192 -; CHECK-NEXT: store i8 -64, ptr [[X3]], align 1 -; CHECK-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 -; CHECK-NEXT: [[Y2_PRE:%.*]] = load i8, ptr [[X]], align 1 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[Y2:%.*]] = phi i8 [ [[Y2_PRE]], [[ENTRY:%.*]] ], [ 0, [[LOOP]] ] -; CHECK-NEXT: [[COND:%.*]] = call i1 @cond2() -; CHECK-NEXT: store i32 0, ptr [[X3]], align 4 -; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[OUT:%.*]] -; CHECK: out: -; CHECK-NEXT: [[R:%.*]] = add i8 [[Y2_PRE]], [[Y2]] -; CHECK-NEXT: ret i8 [[R]] +; LE-MEMDEP-LABEL: define i8 @phi_trans4( +; LE-MEMDEP-SAME: ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: [[ENTRY:.*]]: +; LE-MEMDEP-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192 +; LE-MEMDEP-NEXT: store i8 -64, ptr [[X3]], align 1 +; LE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; LE-MEMDEP-NEXT: br label %[[LOOP:.*]] +; LE-MEMDEP: [[LOOP]]: +; LE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ 0, %[[LOOP]] ] +; LE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2() +; LE-MEMDEP-NEXT: store i32 0, ptr [[X3]], align 4 +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]] +; LE-MEMDEP: [[OUT]]: +; LE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; LE-MEMDEP-NEXT: ret i8 [[R]] +; +; LE-MEMSSA-LABEL: define i8 @phi_trans4( +; LE-MEMSSA-SAME: ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: [[ENTRY:.*]]: +; LE-MEMSSA-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192 +; LE-MEMSSA-NEXT: store i8 -64, ptr [[X3]], align 1 +; LE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; LE-MEMSSA-NEXT: br label %[[LOOP:.*]] +; LE-MEMSSA: [[LOOP]]: +; LE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 192, %[[LOOP]] ] +; LE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] +; LE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1 +; LE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2() +; LE-MEMSSA-NEXT: store i32 0, ptr [[X3]], align 4 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]] +; LE-MEMSSA: [[OUT]]: +; LE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; LE-MEMSSA-NEXT: ret i8 [[R]] +; +; BE-MEMDEP-LABEL: define i8 @phi_trans4( +; BE-MEMDEP-SAME: ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: [[ENTRY:.*]]: +; BE-MEMDEP-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192 +; BE-MEMDEP-NEXT: store i8 -64, ptr [[X3]], align 1 +; BE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; BE-MEMDEP-NEXT: br label %[[LOOP:.*]] +; BE-MEMDEP: [[LOOP]]: +; BE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ 0, %[[LOOP]] ] +; BE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2() +; BE-MEMDEP-NEXT: store i32 0, ptr [[X3]], align 4 +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]] +; BE-MEMDEP: [[OUT]]: +; BE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; BE-MEMDEP-NEXT: ret i8 [[R]] +; +; BE-MEMSSA-LABEL: define i8 @phi_trans4( +; BE-MEMSSA-SAME: ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: [[ENTRY:.*]]: +; BE-MEMSSA-NEXT: [[X3:%.*]] = getelementptr i8, ptr [[P]], i32 192 +; BE-MEMSSA-NEXT: store i8 -64, ptr [[X3]], align 1 +; BE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; BE-MEMSSA-NEXT: br label %[[LOOP:.*]] +; BE-MEMSSA: [[LOOP]]: +; BE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 192, %[[LOOP]] ] +; BE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] +; BE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1 +; BE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2() +; BE-MEMSSA-NEXT: store i32 0, ptr [[X3]], align 4 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[LOOP]], label %[[OUT:.*]] +; BE-MEMSSA: [[OUT]]: +; BE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; BE-MEMSSA-NEXT: ret i8 [[R]] ; entry: %X3 = getelementptr i8, ptr %p, i32 192 @@ -709,28 +1371,97 @@ out: } define i8 @phi_trans5(ptr %p) { -; CHECK-LABEL: @phi_trans5( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 2 -; CHECK-NEXT: store i8 19, ptr [[X4]], align 1 -; CHECK-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 -; CHECK-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], [[ENTRY:%.*]] ], [ [[Y2_PRE:%.*]], [[CONT:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ 3, [[CONT]] ] -; CHECK-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] -; CHECK-NEXT: [[COND:%.*]] = call i1 @cond2() -; CHECK-NEXT: br i1 [[COND]], label [[CONT]], label [[OUT:%.*]] -; CHECK: cont: -; CHECK-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1 -; CHECK-NEXT: store i32 50462976, ptr [[Z]], align 4 -; CHECK-NEXT: [[X2_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; CHECK-NEXT: [[Y2_PRE]] = load i8, ptr [[X2_PHI_TRANS_INSERT]], align 1 -; CHECK-NEXT: br label [[LOOP]] -; CHECK: out: -; CHECK-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] -; CHECK-NEXT: ret i8 [[R]] +; LE-MEMDEP-LABEL: define i8 @phi_trans5( +; LE-MEMDEP-SAME: ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: [[ENTRY:.*]]: +; LE-MEMDEP-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-MEMDEP-NEXT: store i8 19, ptr [[X4]], align 1 +; LE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; LE-MEMDEP-NEXT: br label %[[LOOP:.*]] +; LE-MEMDEP: [[LOOP]]: +; LE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ [[Y2_PRE:%.*]], %[[CONT:.*]] ] +; LE-MEMDEP-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT]] ] +; LE-MEMDEP-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] +; LE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2() +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]] +; LE-MEMDEP: [[CONT]]: +; LE-MEMDEP-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1 +; LE-MEMDEP-NEXT: store i32 50462976, ptr [[Z]], align 4 +; LE-MEMDEP-NEXT: [[X2_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; LE-MEMDEP-NEXT: [[Y2_PRE]] = load i8, ptr [[X2_PHI_TRANS_INSERT]], align 1 +; LE-MEMDEP-NEXT: br label %[[LOOP]] +; LE-MEMDEP: [[OUT]]: +; LE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; LE-MEMDEP-NEXT: ret i8 [[R]] +; +; LE-MEMSSA-LABEL: define i8 @phi_trans5( +; LE-MEMSSA-SAME: ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: [[ENTRY:.*]]: +; LE-MEMSSA-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; LE-MEMSSA-NEXT: store i8 19, ptr [[X4]], align 1 +; LE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; LE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; LE-MEMSSA-NEXT: br label %[[LOOP:.*]] +; LE-MEMSSA: [[LOOP]]: +; LE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT:.*]] ] +; LE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] +; LE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1 +; LE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2() +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]] +; LE-MEMSSA: [[CONT]]: +; LE-MEMSSA-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1 +; LE-MEMSSA-NEXT: store i32 50462976, ptr [[Z]], align 4 +; LE-MEMSSA-NEXT: br label %[[LOOP]] +; LE-MEMSSA: [[OUT]]: +; LE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; LE-MEMSSA-NEXT: ret i8 [[R]] +; +; BE-MEMDEP-LABEL: define i8 @phi_trans5( +; BE-MEMDEP-SAME: ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: [[ENTRY:.*]]: +; BE-MEMDEP-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-MEMDEP-NEXT: store i8 19, ptr [[X4]], align 1 +; BE-MEMDEP-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-MEMDEP-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; BE-MEMDEP-NEXT: br label %[[LOOP:.*]] +; BE-MEMDEP: [[LOOP]]: +; BE-MEMDEP-NEXT: [[Y2:%.*]] = phi i8 [ [[Y]], %[[ENTRY]] ], [ [[Y2_PRE:%.*]], %[[CONT:.*]] ] +; BE-MEMDEP-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT]] ] +; BE-MEMDEP-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] +; BE-MEMDEP-NEXT: [[COND:%.*]] = call i1 @cond2() +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]] +; BE-MEMDEP: [[CONT]]: +; BE-MEMDEP-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1 +; BE-MEMDEP-NEXT: store i32 50462976, ptr [[Z]], align 4 +; BE-MEMDEP-NEXT: [[X2_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i32 3 +; BE-MEMDEP-NEXT: [[Y2_PRE]] = load i8, ptr [[X2_PHI_TRANS_INSERT]], align 1 +; BE-MEMDEP-NEXT: br label %[[LOOP]] +; BE-MEMDEP: [[OUT]]: +; BE-MEMDEP-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; BE-MEMDEP-NEXT: ret i8 [[R]] +; +; BE-MEMSSA-LABEL: define i8 @phi_trans5( +; BE-MEMSSA-SAME: ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: [[ENTRY:.*]]: +; BE-MEMSSA-NEXT: [[X4:%.*]] = getelementptr i8, ptr [[P]], i32 2 +; BE-MEMSSA-NEXT: store i8 19, ptr [[X4]], align 1 +; BE-MEMSSA-NEXT: [[X:%.*]] = getelementptr i8, ptr [[P]], i32 4 +; BE-MEMSSA-NEXT: [[Y:%.*]] = load i8, ptr [[X]], align 1 +; BE-MEMSSA-NEXT: br label %[[LOOP:.*]] +; BE-MEMSSA: [[LOOP]]: +; BE-MEMSSA-NEXT: [[I:%.*]] = phi i32 [ 4, %[[ENTRY]] ], [ 3, %[[CONT:.*]] ] +; BE-MEMSSA-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[P]], i32 [[I]] +; BE-MEMSSA-NEXT: [[Y2:%.*]] = load i8, ptr [[X2]], align 1 +; BE-MEMSSA-NEXT: [[COND:%.*]] = call i1 @cond2() +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[CONT]], label %[[OUT:.*]] +; BE-MEMSSA: [[CONT]]: +; BE-MEMSSA-NEXT: [[Z:%.*]] = getelementptr i8, ptr [[X2]], i32 -1 +; BE-MEMSSA-NEXT: store i32 50462976, ptr [[Z]], align 4 +; BE-MEMSSA-NEXT: br label %[[LOOP]] +; BE-MEMSSA: [[OUT]]: +; BE-MEMSSA-NEXT: [[R:%.*]] = add i8 [[Y]], [[Y2]] +; BE-MEMSSA-NEXT: ret i8 [[R]] ; entry: @@ -766,24 +1497,79 @@ declare void @use_i32(i32) readonly ; into header. Make sure we translate the address for %l1 correctly where ; parts of the address computations are in different basic blocks. define i32 @phi_trans6(ptr noalias nocapture readonly %x, i1 %cond) { -; CHECK-LABEL: @phi_trans6( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[X:%.*]], align 4 -; CHECK-NEXT: call void @use_i32(i32 [[L0]]) -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[L1_PRE:%.*]] = phi i32 [ [[L0]], [[ENTRY:%.*]] ], [ [[L1_PRE1:%.*]], [[LATCH_HEADER_CRIT_EDGE:%.*]] ] -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[LATCH_HEADER_CRIT_EDGE]] ] -; CHECK-NEXT: indirectbr ptr blockaddress(@phi_trans6, [[LATCH:%.*]]), [label %latch] -; CHECK: latch: -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[LATCH_HEADER_CRIT_EDGE]] -; CHECK: latch.header_crit_edge: -; CHECK-NEXT: [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV_NEXT]] -; CHECK-NEXT: [[L1_PRE1]] = load i32, ptr [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT]], align 4 -; CHECK-NEXT: br label [[HEADER]] -; CHECK: exit: -; CHECK-NEXT: ret i32 [[L1_PRE]] +; LE-MEMDEP-LABEL: define i32 @phi_trans6( +; LE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: [[ENTRY:.*]]: +; LE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]]) +; LE-MEMDEP-NEXT: br label %[[HEADER:.*]] +; LE-MEMDEP: [[HEADER]]: +; LE-MEMDEP-NEXT: [[L1:%.*]] = phi i32 [ [[L0]], %[[ENTRY]] ], [ [[L1_PRE:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ] +; LE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE]] ] +; LE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH:.*]]), [label %latch] +; LE-MEMDEP: [[LATCH]]: +; LE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]] +; LE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]: +; LE-MEMDEP-NEXT: [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV_NEXT]] +; LE-MEMDEP-NEXT: [[L1_PRE]] = load i32, ptr [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT]], align 4 +; LE-MEMDEP-NEXT: br label %[[HEADER]] +; LE-MEMDEP: [[EXIT]]: +; LE-MEMDEP-NEXT: ret i32 [[L1]] +; +; LE-MEMSSA-LABEL: define i32 @phi_trans6( +; LE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: [[ENTRY:.*]]: +; LE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]]) +; LE-MEMSSA-NEXT: br label %[[HEADER:.*]] +; LE-MEMSSA: [[HEADER]]: +; LE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; LE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH]]), [label %latch] +; LE-MEMSSA: [[LATCH]]: +; LE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV]] +; LE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; LE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]] +; LE-MEMSSA: [[EXIT]]: +; LE-MEMSSA-NEXT: ret i32 [[L1]] +; +; BE-MEMDEP-LABEL: define i32 @phi_trans6( +; BE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: [[ENTRY:.*]]: +; BE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]]) +; BE-MEMDEP-NEXT: br label %[[HEADER:.*]] +; BE-MEMDEP: [[HEADER]]: +; BE-MEMDEP-NEXT: [[L1:%.*]] = phi i32 [ [[L0]], %[[ENTRY]] ], [ [[L1_PRE:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ] +; BE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE]] ] +; BE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH:.*]]), [label %latch] +; BE-MEMDEP: [[LATCH]]: +; BE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]] +; BE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]: +; BE-MEMDEP-NEXT: [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV_NEXT]] +; BE-MEMDEP-NEXT: [[L1_PRE]] = load i32, ptr [[GEP_1_PHI_TRANS_INSERT_PHI_TRANS_INSERT]], align 4 +; BE-MEMDEP-NEXT: br label %[[HEADER]] +; BE-MEMDEP: [[EXIT]]: +; BE-MEMDEP-NEXT: ret i32 [[L1]] +; +; BE-MEMSSA-LABEL: define i32 @phi_trans6( +; BE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: [[ENTRY:.*]]: +; BE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]]) +; BE-MEMSSA-NEXT: br label %[[HEADER:.*]] +; BE-MEMSSA: [[HEADER]]: +; BE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; BE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans6, %[[LATCH]]), [label %latch] +; BE-MEMSSA: [[LATCH]]: +; BE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[IV]] +; BE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; BE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]] +; BE-MEMSSA: [[EXIT]]: +; BE-MEMSSA-NEXT: ret i32 [[L1]] ; entry: %l0 = load i32, ptr %x @@ -806,24 +1592,81 @@ exit: ; FIXME: Currently we fail to translate the PHI in this case. define i32 @phi_trans7(ptr noalias nocapture readonly %x, i1 %cond) { -; CHECK-LABEL: @phi_trans7( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[X:%.*]], align 4 -; CHECK-NEXT: call void @use_i32(i32 [[L0]]) -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 2, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH_HEADER_CRIT_EDGE:%.*]] ] -; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 -; CHECK-NEXT: indirectbr ptr blockaddress(@phi_trans7, [[LATCH:%.*]]), [label %latch] -; CHECK: latch: -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[LATCH_HEADER_CRIT_EDGE]] -; CHECK: latch.header_crit_edge: -; CHECK-NEXT: br label [[HEADER]] -; CHECK: exit: -; CHECK-NEXT: ret i32 [[L1]] +; LE-MEMDEP-LABEL: define i32 @phi_trans7( +; LE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: [[ENTRY:.*]]: +; LE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]]) +; LE-MEMDEP-NEXT: br label %[[HEADER:.*]] +; LE-MEMDEP: [[HEADER]]: +; LE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ] +; LE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; LE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH:.*]]), [label %latch] +; LE-MEMDEP: [[LATCH]]: +; LE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; LE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; LE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]] +; LE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]: +; LE-MEMDEP-NEXT: br label %[[HEADER]] +; LE-MEMDEP: [[EXIT]]: +; LE-MEMDEP-NEXT: ret i32 [[L1]] +; +; LE-MEMSSA-LABEL: define i32 @phi_trans7( +; LE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: [[ENTRY:.*]]: +; LE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]]) +; LE-MEMSSA-NEXT: br label %[[HEADER:.*]] +; LE-MEMSSA: [[HEADER]]: +; LE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; LE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; LE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH]]), [label %latch] +; LE-MEMSSA: [[LATCH]]: +; LE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; LE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; LE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]] +; LE-MEMSSA: [[EXIT]]: +; LE-MEMSSA-NEXT: ret i32 [[L1]] +; +; BE-MEMDEP-LABEL: define i32 @phi_trans7( +; BE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: [[ENTRY:.*]]: +; BE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]]) +; BE-MEMDEP-NEXT: br label %[[HEADER:.*]] +; BE-MEMDEP: [[HEADER]]: +; BE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ] +; BE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; BE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH:.*]]), [label %latch] +; BE-MEMDEP: [[LATCH]]: +; BE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; BE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; BE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]] +; BE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]: +; BE-MEMDEP-NEXT: br label %[[HEADER]] +; BE-MEMDEP: [[EXIT]]: +; BE-MEMDEP-NEXT: ret i32 [[L1]] +; +; BE-MEMSSA-LABEL: define i32 @phi_trans7( +; BE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: [[ENTRY:.*]]: +; BE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]]) +; BE-MEMSSA-NEXT: br label %[[HEADER:.*]] +; BE-MEMSSA: [[HEADER]]: +; BE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; BE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; BE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans7, %[[LATCH]]), [label %latch] +; BE-MEMSSA: [[LATCH]]: +; BE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; BE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; BE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]] +; BE-MEMSSA: [[EXIT]]: +; BE-MEMSSA-NEXT: ret i32 [[L1]] ; entry: %l0 = load i32, ptr %x @@ -847,24 +1690,81 @@ exit: ; FIXME: Currently we fail to translate the PHI in this case. define i32 @phi_trans8(ptr noalias nocapture readonly %x, i1 %cond) { -; CHECK-LABEL: @phi_trans8( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[L0:%.*]] = load i32, ptr [[X:%.*]], align 4 -; CHECK-NEXT: call void @use_i32(i32 [[L0]]) -; CHECK-NEXT: br label [[HEADER:%.*]] -; CHECK: header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 2, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH_HEADER_CRIT_EDGE:%.*]] ] -; CHECK-NEXT: indirectbr ptr blockaddress(@phi_trans8, [[LATCH:%.*]]), [label %latch] -; CHECK: latch: -; CHECK-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 -; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] -; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 -; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT:%.*]], label [[LATCH_HEADER_CRIT_EDGE]] -; CHECK: latch.header_crit_edge: -; CHECK-NEXT: br label [[HEADER]] -; CHECK: exit: -; CHECK-NEXT: ret i32 [[L1]] +; LE-MEMDEP-LABEL: define i32 @phi_trans8( +; LE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; LE-MEMDEP-NEXT: [[ENTRY:.*]]: +; LE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]]) +; LE-MEMDEP-NEXT: br label %[[HEADER:.*]] +; LE-MEMDEP: [[HEADER]]: +; LE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ] +; LE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH:.*]]), [label %latch] +; LE-MEMDEP: [[LATCH]]: +; LE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; LE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; LE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; LE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]] +; LE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]: +; LE-MEMDEP-NEXT: br label %[[HEADER]] +; LE-MEMDEP: [[EXIT]]: +; LE-MEMDEP-NEXT: ret i32 [[L1]] +; +; LE-MEMSSA-LABEL: define i32 @phi_trans8( +; LE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; LE-MEMSSA-NEXT: [[ENTRY:.*]]: +; LE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]]) +; LE-MEMSSA-NEXT: br label %[[HEADER:.*]] +; LE-MEMSSA: [[HEADER]]: +; LE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; LE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH]]), [label %latch] +; LE-MEMSSA: [[LATCH]]: +; LE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; LE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; LE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; LE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; LE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]] +; LE-MEMSSA: [[EXIT]]: +; LE-MEMSSA-NEXT: ret i32 [[L1]] +; +; BE-MEMDEP-LABEL: define i32 @phi_trans8( +; BE-MEMDEP-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; BE-MEMDEP-NEXT: [[ENTRY:.*]]: +; BE-MEMDEP-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMDEP-NEXT: call void @use_i32(i32 [[L0]]) +; BE-MEMDEP-NEXT: br label %[[HEADER:.*]] +; BE-MEMDEP: [[HEADER]]: +; BE-MEMDEP-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH_HEADER_CRIT_EDGE:.*]] ] +; BE-MEMDEP-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH:.*]]), [label %latch] +; BE-MEMDEP: [[LATCH]]: +; BE-MEMDEP-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; BE-MEMDEP-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; BE-MEMDEP-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; BE-MEMDEP-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BE-MEMDEP-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LATCH_HEADER_CRIT_EDGE]] +; BE-MEMDEP: [[LATCH_HEADER_CRIT_EDGE]]: +; BE-MEMDEP-NEXT: br label %[[HEADER]] +; BE-MEMDEP: [[EXIT]]: +; BE-MEMDEP-NEXT: ret i32 [[L1]] +; +; BE-MEMSSA-LABEL: define i32 @phi_trans8( +; BE-MEMSSA-SAME: ptr noalias readonly captures(none) [[X:%.*]], i1 [[COND:%.*]]) { +; BE-MEMSSA-NEXT: [[ENTRY:.*]]: +; BE-MEMSSA-NEXT: [[L0:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMSSA-NEXT: call void @use_i32(i32 [[L0]]) +; BE-MEMSSA-NEXT: br label %[[HEADER:.*]] +; BE-MEMSSA: [[HEADER]]: +; BE-MEMSSA-NEXT: [[IV:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; BE-MEMSSA-NEXT: indirectbr ptr blockaddress(@phi_trans8, %[[LATCH]]), [label %latch] +; BE-MEMSSA: [[LATCH]]: +; BE-MEMSSA-NEXT: [[OFFSET:%.*]] = add i32 [[IV]], -2 +; BE-MEMSSA-NEXT: [[GEP_1:%.*]] = getelementptr i32, ptr [[X]], i32 [[OFFSET]] +; BE-MEMSSA-NEXT: [[L1:%.*]] = load i32, ptr [[GEP_1]], align 4 +; BE-MEMSSA-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; BE-MEMSSA-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[HEADER]] +; BE-MEMSSA: [[EXIT]]: +; BE-MEMSSA-NEXT: ret i32 [[L1]] ; entry: %l0 = load i32, ptr %x @@ -890,11 +1790,35 @@ exit: ; PR6642 define i32 @memset_to_load() nounwind readnone { -; CHECK-LABEL: @memset_to_load( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[X:%.*]] = alloca [256 x i32], align 4 -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false) -; CHECK-NEXT: ret i32 0 +; LE-MEMDEP-LABEL: define i32 @memset_to_load( +; LE-MEMDEP-SAME: ) #[[ATTR2:[0-9]+]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: [[X:%.*]] = alloca [256 x i32], align 4 +; LE-MEMDEP-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false) +; LE-MEMDEP-NEXT: ret i32 0 +; +; LE-MEMSSA-LABEL: define i32 @memset_to_load( +; LE-MEMSSA-SAME: ) #[[ATTR2:[0-9]+]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: [[X:%.*]] = alloca [256 x i32], align 4 +; LE-MEMSSA-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false) +; LE-MEMSSA-NEXT: [[TTMP1:%.*]] = load i32, ptr [[X]], align 4 +; LE-MEMSSA-NEXT: ret i32 [[TTMP1]] +; +; BE-MEMDEP-LABEL: define i32 @memset_to_load( +; BE-MEMDEP-SAME: ) #[[ATTR2:[0-9]+]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: [[X:%.*]] = alloca [256 x i32], align 4 +; BE-MEMDEP-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false) +; BE-MEMDEP-NEXT: ret i32 0 +; +; BE-MEMSSA-LABEL: define i32 @memset_to_load( +; BE-MEMSSA-SAME: ) #[[ATTR2:[0-9]+]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: [[X:%.*]] = alloca [256 x i32], align 4 +; BE-MEMSSA-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[X]], i8 0, i64 1024, i1 false) +; BE-MEMSSA-NEXT: [[TTMP1:%.*]] = load i32, ptr [[X]], align 4 +; BE-MEMSSA-NEXT: ret i32 [[TTMP1]] ; entry: %x = alloca [256 x i32], align 4 ; <ptr> [#uses=2] @@ -910,23 +1834,45 @@ entry: ;;===----------------------------------------------------------------------===;; define i32 @load_load_partial_alias(ptr %P) nounwind ssp { -; LE-LABEL: @load_load_partial_alias( -; LE-NEXT: entry: -; LE-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P:%.*]], align 4 -; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 8 -; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; LE-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 -; LE-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] -; LE-NEXT: ret i32 [[ADD]] -; -; BE-LABEL: @load_load_partial_alias( -; BE-NEXT: entry: -; BE-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P:%.*]], align 4 -; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 16 -; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; BE-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 -; BE-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] -; BE-NEXT: ret i32 [[ADD]] +; LE-MEMDEP-LABEL: define i32 @load_load_partial_alias( +; LE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 8 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; LE-MEMDEP-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 +; LE-MEMDEP-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] +; LE-MEMDEP-NEXT: ret i32 [[ADD]] +; +; LE-MEMSSA-LABEL: define i32 @load_load_partial_alias( +; LE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1 +; LE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ADD_PTR]], align 1 +; LE-MEMSSA-NEXT: [[CONV:%.*]] = zext i8 [[TTMP5]] to i32 +; LE-MEMSSA-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] +; LE-MEMSSA-NEXT: ret i32 [[ADD]] +; +; BE-MEMDEP-LABEL: define i32 @load_load_partial_alias( +; BE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[TTMP2]], 16 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; BE-MEMDEP-NEXT: [[CONV:%.*]] = zext i8 [[TMP1]] to i32 +; BE-MEMDEP-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] +; BE-MEMDEP-NEXT: ret i32 [[ADD]] +; +; BE-MEMSSA-LABEL: define i32 @load_load_partial_alias( +; BE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: [[TTMP2:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1 +; BE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ADD_PTR]], align 1 +; BE-MEMSSA-NEXT: [[CONV:%.*]] = zext i8 [[TTMP5]] to i32 +; BE-MEMSSA-NEXT: [[ADD:%.*]] = add nsw i32 [[TTMP2]], [[CONV]] +; BE-MEMSSA-NEXT: ret i32 [[ADD]] ; entry: %ttmp2 = load i32, ptr %P @@ -940,31 +1886,61 @@ entry: ; Cross block partial alias case. define i32 @load_load_partial_alias_cross_block(ptr %P) nounwind ssp { -; LE-LABEL: @load_load_partial_alias_cross_block( -; LE-NEXT: entry: -; LE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4 -; LE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 -; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 -; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; LE-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]] -; LE: land.lhs.true: -; LE-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32 -; LE-NEXT: ret i32 [[CONV6]] -; LE: if.end: -; LE-NEXT: ret i32 52 -; -; BE-LABEL: @load_load_partial_alias_cross_block( -; BE-NEXT: entry: -; BE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4 -; BE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 -; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 -; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; BE-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[IF_END:%.*]] -; BE: land.lhs.true: -; BE-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32 -; BE-NEXT: ret i32 [[CONV6]] -; BE: if.end: -; BE-NEXT: ret i32 52 +; LE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block( +; LE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]] +; LE-MEMDEP: [[LAND_LHS_TRUE]]: +; LE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32 +; LE-MEMDEP-NEXT: ret i32 [[CONV6]] +; LE-MEMDEP: [[IF_END]]: +; LE-MEMDEP-NEXT: ret i32 52 +; +; LE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block( +; LE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]] +; LE-MEMSSA: [[LAND_LHS_TRUE]]: +; LE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1 +; LE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; LE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 +; LE-MEMSSA-NEXT: ret i32 [[CONV6]] +; LE-MEMSSA: [[IF_END]]: +; LE-MEMSSA-NEXT: ret i32 52 +; +; BE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block( +; BE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]] +; BE-MEMDEP: [[LAND_LHS_TRUE]]: +; BE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TMP1]] to i32 +; BE-MEMDEP-NEXT: ret i32 [[CONV6]] +; BE-MEMDEP: [[IF_END]]: +; BE-MEMDEP-NEXT: ret i32 52 +; +; BE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block( +; BE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR0]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LAND_LHS_TRUE:.*]], label %[[IF_END:.*]] +; BE-MEMSSA: [[LAND_LHS_TRUE]]: +; BE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 1 +; BE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; BE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 +; BE-MEMSSA-NEXT: ret i32 [[CONV6]] +; BE-MEMSSA: [[IF_END]]: +; BE-MEMSSA-NEXT: ret i32 52 ; entry: %x1 = load i32, ptr %P, align 4 @@ -982,45 +1958,85 @@ if.end: } define i32 @load_load_partial_alias_cross_block_phi_trans(ptr %P) nounwind { -; LE-LABEL: @load_load_partial_alias_cross_block_phi_trans( -; LE-NEXT: entry: -; LE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4 -; LE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 -; LE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 -; LE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; LE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8 -; LE-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; LE-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]] -; LE: if: -; LE-NEXT: br label [[JOIN:%.*]] -; LE: else: -; LE-NEXT: br label [[JOIN]] -; LE: join: -; LE-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ] -; LE-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 -; LE-NEXT: ret i32 [[CONV6]] -; LE: if.end: -; LE-NEXT: ret i32 52 -; -; BE-LABEL: @load_load_partial_alias_cross_block_phi_trans( -; BE-NEXT: entry: -; BE-NEXT: [[X1:%.*]] = load i32, ptr [[P:%.*]], align 4 -; BE-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 -; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 -; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16 -; BE-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 -; BE-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE:%.*]] -; BE: if: -; BE-NEXT: br label [[JOIN:%.*]] -; BE: else: -; BE-NEXT: br label [[JOIN]] -; BE: join: -; BE-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], [[IF]] ], [ [[TMP1]], [[ELSE]] ] -; BE-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 -; BE-NEXT: ret i32 [[CONV6]] -; BE: if.end: -; BE-NEXT: ret i32 52 +; LE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans( +; LE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR3]] { +; LE-MEMDEP-NEXT: [[ENTRY:.*:]] +; LE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 16 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; LE-MEMDEP-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 8 +; LE-MEMDEP-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 +; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; LE-MEMDEP: [[IF]]: +; LE-MEMDEP-NEXT: br label %[[JOIN:.*]] +; LE-MEMDEP: [[ELSE]]: +; LE-MEMDEP-NEXT: br label %[[JOIN]] +; LE-MEMDEP: [[JOIN]]: +; LE-MEMDEP-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], %[[IF]] ], [ [[TMP1]], %[[ELSE]] ] +; LE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 +; LE-MEMDEP-NEXT: ret i32 [[CONV6]] +; LE-MEMDEP: [[IF_END:.*:]] +; LE-MEMDEP-NEXT: ret i32 52 +; +; LE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans( +; LE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR3]] { +; LE-MEMSSA-NEXT: [[ENTRY:.*:]] +; LE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; LE-MEMSSA: [[IF]]: +; LE-MEMSSA-NEXT: br label %[[JOIN:.*]] +; LE-MEMSSA: [[ELSE]]: +; LE-MEMSSA-NEXT: br label %[[JOIN]] +; LE-MEMSSA: [[JOIN]]: +; LE-MEMSSA-NEXT: [[IDX:%.*]] = phi i64 [ 1, %[[IF]] ], [ 2, %[[ELSE]] ] +; LE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]] +; LE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; LE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 +; LE-MEMSSA-NEXT: ret i32 [[CONV6]] +; LE-MEMSSA: [[IF_END:.*:]] +; LE-MEMSSA-NEXT: ret i32 52 +; +; BE-MEMDEP-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans( +; BE-MEMDEP-SAME: ptr [[P:%.*]]) #[[ATTR3]] { +; BE-MEMDEP-NEXT: [[ENTRY:.*:]] +; BE-MEMDEP-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[X1]], 8 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = lshr i32 [[X1]], 16 +; BE-MEMDEP-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i8 +; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; BE-MEMDEP: [[IF]]: +; BE-MEMDEP-NEXT: br label %[[JOIN:.*]] +; BE-MEMDEP: [[ELSE]]: +; BE-MEMDEP-NEXT: br label %[[JOIN]] +; BE-MEMDEP: [[JOIN]]: +; BE-MEMDEP-NEXT: [[TTMP5:%.*]] = phi i8 [ [[TMP3]], %[[IF]] ], [ [[TMP1]], %[[ELSE]] ] +; BE-MEMDEP-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 +; BE-MEMDEP-NEXT: ret i32 [[CONV6]] +; BE-MEMDEP: [[IF_END:.*:]] +; BE-MEMDEP-NEXT: ret i32 52 +; +; BE-MEMSSA-LABEL: define i32 @load_load_partial_alias_cross_block_phi_trans( +; BE-MEMSSA-SAME: ptr [[P:%.*]]) #[[ATTR3]] { +; BE-MEMSSA-NEXT: [[ENTRY:.*:]] +; BE-MEMSSA-NEXT: [[X1:%.*]] = load i32, ptr [[P]], align 4 +; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp eq i32 [[X1]], 127 +; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; BE-MEMSSA: [[IF]]: +; BE-MEMSSA-NEXT: br label %[[JOIN:.*]] +; BE-MEMSSA: [[ELSE]]: +; BE-MEMSSA-NEXT: br label %[[JOIN]] +; BE-MEMSSA: [[JOIN]]: +; BE-MEMSSA-NEXT: [[IDX:%.*]] = phi i64 [ 1, %[[IF]] ], [ 2, %[[ELSE]] ] +; BE-MEMSSA-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 [[IDX]] +; BE-MEMSSA-NEXT: [[TTMP5:%.*]] = load i8, ptr [[ARRAYIDX4]], align 1 +; BE-MEMSSA-NEXT: [[CONV6:%.*]] = zext i8 [[TTMP5]] to i32 +; BE-MEMSSA-NEXT: ret i32 [[CONV6]] +; BE-MEMSSA: [[IF_END:.*:]] +; BE-MEMSSA-NEXT: ret i32 52 ; entry: %x1 = load i32, ptr %P, align 4 @@ -1047,58 +2063,104 @@ if.end: } define void @load_load_partial_alias_loop(ptr %P) { -; LE-LABEL: @load_load_partial_alias_loop( -; LE-NEXT: entry: -; LE-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 -; LE-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1 -; LE-NEXT: call void @use.i8(i8 [[V_1]]) -; LE-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 -; LE-NEXT: call void @use.i32(i32 [[V_1_32]]) -; LE-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8 -; LE-NEXT: br label [[LOOP:%.*]] -; LE: loop: -; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] -; LE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ] -; LE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] -; LE-NEXT: call void @use.i8(i8 [[V_I]]) -; LE-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4 -; LE-NEXT: call void @use.i32(i32 [[V_I_32]]) -; LE-NEXT: [[I_INC]] = add i64 [[I]], 1 -; LE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 -; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8 -; LE-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8 -; LE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]] -; LE: loop.loop_crit_edge: -; LE-NEXT: br label [[LOOP]] -; LE: exit: -; LE-NEXT: ret void -; -; BE-LABEL: @load_load_partial_alias_loop( -; BE-NEXT: entry: -; BE-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 -; BE-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1 -; BE-NEXT: call void @use.i8(i8 [[V_1]]) -; BE-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 -; BE-NEXT: call void @use.i32(i32 [[V_1_32]]) -; BE-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24 -; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 -; BE-NEXT: br label [[LOOP:%.*]] -; BE: loop: -; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] -; BE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ] -; BE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] -; BE-NEXT: call void @use.i8(i8 [[V_I]]) -; BE-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4 -; BE-NEXT: call void @use.i32(i32 [[V_I_32]]) -; BE-NEXT: [[I_INC]] = add i64 [[I]], 1 -; BE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 -; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16 -; BE-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8 -; BE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]] -; BE: loop.loop_crit_edge: -; BE-NEXT: br label [[LOOP]] -; BE: exit: -; BE-NEXT: ret void +; LE-MEMDEP-LABEL: define void @load_load_partial_alias_loop( +; LE-MEMDEP-SAME: ptr [[P:%.*]]) { +; LE-MEMDEP-NEXT: [[ENTRY:.*]]: +; LE-MEMDEP-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; LE-MEMDEP-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1 +; LE-MEMDEP-NEXT: call void @use.i8(i8 [[V_1]]) +; LE-MEMDEP-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 +; LE-MEMDEP-NEXT: call void @use.i32(i32 [[V_1_32]]) +; LE-MEMDEP-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8 +; LE-MEMDEP-NEXT: br label %[[LOOP:.*]] +; LE-MEMDEP: [[LOOP]]: +; LE-MEMDEP-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[LOOP_LOOP_CRIT_EDGE:.*]] ] +; LE-MEMDEP-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_LOOP_CRIT_EDGE]] ] +; LE-MEMDEP-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; LE-MEMDEP-NEXT: call void @use.i8(i8 [[V_I]]) +; LE-MEMDEP-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4 +; LE-MEMDEP-NEXT: call void @use.i32(i32 [[V_I_32]]) +; LE-MEMDEP-NEXT: [[I_INC]] = add i64 [[I]], 1 +; LE-MEMDEP-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8 +; LE-MEMDEP-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8 +; LE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LOOP_LOOP_CRIT_EDGE]], label %[[EXIT:.*]] +; LE-MEMDEP: [[LOOP_LOOP_CRIT_EDGE]]: +; LE-MEMDEP-NEXT: br label %[[LOOP]] +; LE-MEMDEP: [[EXIT]]: +; LE-MEMDEP-NEXT: ret void +; +; LE-MEMSSA-LABEL: define void @load_load_partial_alias_loop( +; LE-MEMSSA-SAME: ptr [[P:%.*]]) { +; LE-MEMSSA-NEXT: [[ENTRY:.*]]: +; LE-MEMSSA-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; LE-MEMSSA-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1 +; LE-MEMSSA-NEXT: call void @use.i8(i8 [[V_1]]) +; LE-MEMSSA-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 +; LE-MEMSSA-NEXT: call void @use.i32(i32 [[V_1_32]]) +; LE-MEMSSA-NEXT: br label %[[LOOP:.*]] +; LE-MEMSSA: [[LOOP]]: +; LE-MEMSSA-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP]] ] +; LE-MEMSSA-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; LE-MEMSSA-NEXT: [[V_I:%.*]] = load i8, ptr [[P_I]], align 1 +; LE-MEMSSA-NEXT: call void @use.i8(i8 [[V_I]]) +; LE-MEMSSA-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4 +; LE-MEMSSA-NEXT: call void @use.i32(i32 [[V_I_32]]) +; LE-MEMSSA-NEXT: [[I_INC]] = add i64 [[I]], 1 +; LE-MEMSSA-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 +; LE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; LE-MEMSSA: [[EXIT]]: +; LE-MEMSSA-NEXT: ret void +; +; BE-MEMDEP-LABEL: define void @load_load_partial_alias_loop( +; BE-MEMDEP-SAME: ptr [[P:%.*]]) { +; BE-MEMDEP-NEXT: [[ENTRY:.*]]: +; BE-MEMDEP-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; BE-MEMDEP-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1 +; BE-MEMDEP-NEXT: call void @use.i8(i8 [[V_1]]) +; BE-MEMDEP-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 +; BE-MEMDEP-NEXT: call void @use.i32(i32 [[V_1_32]]) +; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i32 [[V_1_32]], 24 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 +; BE-MEMDEP-NEXT: br label %[[LOOP:.*]] +; BE-MEMDEP: [[LOOP]]: +; BE-MEMDEP-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[LOOP_LOOP_CRIT_EDGE:.*]] ] +; BE-MEMDEP-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP_LOOP_CRIT_EDGE]] ] +; BE-MEMDEP-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; BE-MEMDEP-NEXT: call void @use.i8(i8 [[V_I]]) +; BE-MEMDEP-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4 +; BE-MEMDEP-NEXT: call void @use.i32(i32 [[V_I_32]]) +; BE-MEMDEP-NEXT: [[I_INC]] = add i64 [[I]], 1 +; BE-MEMDEP-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 +; BE-MEMDEP-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16 +; BE-MEMDEP-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8 +; BE-MEMDEP-NEXT: br i1 [[CMP]], label %[[LOOP_LOOP_CRIT_EDGE]], label %[[EXIT:.*]] +; BE-MEMDEP: [[LOOP_LOOP_CRIT_EDGE]]: +; BE-MEMDEP-NEXT: br label %[[LOOP]] +; BE-MEMDEP: [[EXIT]]: +; BE-MEMDEP-NEXT: ret void +; +; BE-MEMSSA-LABEL: define void @load_load_partial_alias_loop( +; BE-MEMSSA-SAME: ptr [[P:%.*]]) { +; BE-MEMSSA-NEXT: [[ENTRY:.*]]: +; BE-MEMSSA-NEXT: [[P_1:%.*]] = getelementptr i8, ptr [[P]], i64 1 +; BE-MEMSSA-NEXT: [[V_1:%.*]] = load i8, ptr [[P_1]], align 1 +; BE-MEMSSA-NEXT: call void @use.i8(i8 [[V_1]]) +; BE-MEMSSA-NEXT: [[V_1_32:%.*]] = load i32, ptr [[P_1]], align 4 +; BE-MEMSSA-NEXT: call void @use.i32(i32 [[V_1_32]]) +; BE-MEMSSA-NEXT: br label %[[LOOP:.*]] +; BE-MEMSSA: [[LOOP]]: +; BE-MEMSSA-NEXT: [[I:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[I_INC:%.*]], %[[LOOP]] ] +; BE-MEMSSA-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] +; BE-MEMSSA-NEXT: [[V_I:%.*]] = load i8, ptr [[P_I]], align 1 +; BE-MEMSSA-NEXT: call void @use.i8(i8 [[V_I]]) +; BE-MEMSSA-NEXT: [[V_I_32:%.*]] = load i32, ptr [[P_I]], align 4 +; BE-MEMSSA-NEXT: call void @use.i32(i32 [[V_I_32]]) +; BE-MEMSSA-NEXT: [[I_INC]] = add i64 [[I]], 1 +; BE-MEMSSA-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 +; BE-MEMSSA-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; BE-MEMSSA: [[EXIT]]: +; BE-MEMSSA-NEXT: ret void ; entry: %P.1 = getelementptr i8, ptr %P, i64 1 @@ -1129,37 +2191,63 @@ declare void @use.i32(i32) readnone @global = external local_unnamed_addr global i8, align 4 define void @load_load_partial_alias_atomic(ptr %arg) { -; LE-LABEL: @load_load_partial_alias_atomic( -; LE-NEXT: bb: -; LE-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1 -; LE-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4 -; LE-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 -; LE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 8 -; LE-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 -; LE-NEXT: br label [[BB5:%.*]] -; LE: bb5: -; LE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ] -; LE-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 -; LE-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] -; LE-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 -; LE-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4 -; LE-NEXT: br label [[BB5]] -; -; BE-LABEL: @load_load_partial_alias_atomic( -; BE-NEXT: bb: -; BE-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 1 -; BE-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4 -; BE-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 -; BE-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 48 -; BE-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 -; BE-NEXT: br label [[BB5:%.*]] -; BE: bb5: -; BE-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], [[BB5]] ], [ [[TMP1]], [[BB:%.*]] ] -; BE-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 -; BE-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] -; BE-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 -; BE-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4 -; BE-NEXT: br label [[BB5]] +; LE-MEMDEP-LABEL: define void @load_load_partial_alias_atomic( +; LE-MEMDEP-SAME: ptr [[ARG:%.*]]) { +; LE-MEMDEP-NEXT: [[BB:.*]]: +; LE-MEMDEP-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1 +; LE-MEMDEP-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4 +; LE-MEMDEP-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 +; LE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 8 +; LE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 +; LE-MEMDEP-NEXT: br label %[[BB5:.*]] +; LE-MEMDEP: [[BB5]]: +; LE-MEMDEP-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], %[[BB5]] ], [ [[TMP1]], %[[BB]] ] +; LE-MEMDEP-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 +; LE-MEMDEP-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] +; LE-MEMDEP-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 +; LE-MEMDEP-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4 +; LE-MEMDEP-NEXT: br label %[[BB5]] +; +; LE-MEMSSA-LABEL: define void @load_load_partial_alias_atomic( +; LE-MEMSSA-SAME: ptr [[ARG:%.*]]) { +; LE-MEMSSA-NEXT: [[BB:.*:]] +; LE-MEMSSA-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 +; LE-MEMSSA-NEXT: br label %[[BB5:.*]] +; LE-MEMSSA: [[BB5]]: +; LE-MEMSSA-NEXT: [[TMP4_1:%.*]] = load i8, ptr [[TMP3_1]], align 4 +; LE-MEMSSA-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 +; LE-MEMSSA-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] +; LE-MEMSSA-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 +; LE-MEMSSA-NEXT: br label %[[BB5]] +; +; BE-MEMDEP-LABEL: define void @load_load_partial_alias_atomic( +; BE-MEMDEP-SAME: ptr [[ARG:%.*]]) { +; BE-MEMDEP-NEXT: [[BB:.*]]: +; BE-MEMDEP-NEXT: [[TMP2_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 1 +; BE-MEMDEP-NEXT: [[TMP2_3:%.*]] = load i64, ptr [[TMP2_1]], align 4 +; BE-MEMDEP-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 +; BE-MEMDEP-NEXT: [[TMP0:%.*]] = lshr i64 [[TMP2_3]], 48 +; BE-MEMDEP-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i8 +; BE-MEMDEP-NEXT: br label %[[BB5:.*]] +; BE-MEMDEP: [[BB5]]: +; BE-MEMDEP-NEXT: [[TMP4_1:%.*]] = phi i8 [ [[TMP4_1_PRE:%.*]], %[[BB5]] ], [ [[TMP1]], %[[BB]] ] +; BE-MEMDEP-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 +; BE-MEMDEP-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] +; BE-MEMDEP-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 +; BE-MEMDEP-NEXT: [[TMP4_1_PRE]] = load i8, ptr [[TMP3_1]], align 4 +; BE-MEMDEP-NEXT: br label %[[BB5]] +; +; BE-MEMSSA-LABEL: define void @load_load_partial_alias_atomic( +; BE-MEMSSA-SAME: ptr [[ARG:%.*]]) { +; BE-MEMSSA-NEXT: [[BB:.*:]] +; BE-MEMSSA-NEXT: [[TMP3_1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 2 +; BE-MEMSSA-NEXT: br label %[[BB5:.*]] +; BE-MEMSSA: [[BB5]]: +; BE-MEMSSA-NEXT: [[TMP4_1:%.*]] = load i8, ptr [[TMP3_1]], align 4 +; BE-MEMSSA-NEXT: [[TMP6_1:%.*]] = load atomic i8, ptr @global acquire, align 4 +; BE-MEMSSA-NEXT: [[TMP7_1:%.*]] = add i8 [[TMP6_1]], [[TMP4_1]] +; BE-MEMSSA-NEXT: store i8 [[TMP7_1]], ptr [[ARG]], align 1 +; BE-MEMSSA-NEXT: br label %[[BB5]] ; bb: %tmp2.1 = getelementptr inbounds i8, ptr %arg, i64 1 @@ -1188,8 +2276,9 @@ bb5: ; preds = %bb14, %bb @f = global %widening1 zeroinitializer, align 4 define i32 @test_widening1(ptr %P) nounwind ssp noredzone { -; CHECK-LABEL: @test_widening1( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test_widening1( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TTMP:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1:%.*]], ptr @f, i64 0, i32 1), align 4 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TTMP]] to i32 ; CHECK-NEXT: [[TTMP1:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1]], ptr @f, i64 0, i32 2), align 1 @@ -1207,8 +2296,9 @@ entry: } define i32 @test_widening2() nounwind ssp noredzone { -; CHECK-LABEL: @test_widening2( -; CHECK-NEXT: entry: +; CHECK-LABEL: define i32 @test_widening2( +; CHECK-SAME: ) #[[ATTR5]] { +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TTMP:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1:%.*]], ptr @f, i64 0, i32 1), align 4 ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TTMP]] to i32 ; CHECK-NEXT: [[TTMP1:%.*]] = load i8, ptr getelementptr inbounds ([[WIDENING1]], ptr @f, i64 0, i32 2), align 1 @@ -1262,12 +2352,39 @@ declare void @use3(ptr, ptr) ; PR8908 define void @test_escape1() nounwind { -; CHECK-LABEL: @test_escape1( -; CHECK-NEXT: [[X:%.*]] = alloca ptr, align 8 -; CHECK-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8 -; CHECK-NEXT: call void @use() #[[ATTR3]] -; CHECK-NEXT: call void @use3(ptr [[X]], ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2)) #[[ATTR3]] -; CHECK-NEXT: ret void +; LE-MEMDEP-LABEL: define void @test_escape1( +; LE-MEMDEP-SAME: ) #[[ATTR3]] { +; LE-MEMDEP-NEXT: [[X:%.*]] = alloca ptr, align 8 +; LE-MEMDEP-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8 +; LE-MEMDEP-NEXT: call void @use() #[[ATTR3]] +; LE-MEMDEP-NEXT: call void @use3(ptr [[X]], ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2)) #[[ATTR3]] +; LE-MEMDEP-NEXT: ret void +; +; LE-MEMSSA-LABEL: define void @test_escape1( +; LE-MEMSSA-SAME: ) #[[ATTR3]] { +; LE-MEMSSA-NEXT: [[X:%.*]] = alloca ptr, align 8 +; LE-MEMSSA-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8 +; LE-MEMSSA-NEXT: call void @use() #[[ATTR3]] +; LE-MEMSSA-NEXT: [[DEAD:%.*]] = load ptr, ptr [[X]], align 8 +; LE-MEMSSA-NEXT: call void @use3(ptr [[X]], ptr [[DEAD]]) #[[ATTR3]] +; LE-MEMSSA-NEXT: ret void +; +; BE-MEMDEP-LABEL: define void @test_escape1( +; BE-MEMDEP-SAME: ) #[[ATTR3]] { +; BE-MEMDEP-NEXT: [[X:%.*]] = alloca ptr, align 8 +; BE-MEMDEP-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8 +; BE-MEMDEP-NEXT: call void @use() #[[ATTR3]] +; BE-MEMDEP-NEXT: call void @use3(ptr [[X]], ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2)) #[[ATTR3]] +; BE-MEMDEP-NEXT: ret void +; +; BE-MEMSSA-LABEL: define void @test_escape1( +; BE-MEMSSA-SAME: ) #[[ATTR3]] { +; BE-MEMSSA-NEXT: [[X:%.*]] = alloca ptr, align 8 +; BE-MEMSSA-NEXT: store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr [[X]], align 8 +; BE-MEMSSA-NEXT: call void @use() #[[ATTR3]] +; BE-MEMSSA-NEXT: [[DEAD:%.*]] = load ptr, ptr [[X]], align 8 +; BE-MEMSSA-NEXT: call void @use3(ptr [[X]], ptr [[DEAD]]) #[[ATTR3]] +; BE-MEMSSA-NEXT: ret void ; %x = alloca ptr, align 8 store ptr getelementptr inbounds ([5 x ptr], ptr @_ZTV1X, i64 0, i64 2), ptr %x, align 8 @@ -1276,3 +2393,6 @@ define void @test_escape1() nounwind { call void @use3(ptr %x, ptr %DEAD) nounwind ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; BE: {{.*}} +; LE: {{.*}} diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll index b57a45f..8a608a1 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs-low-threshold.ll @@ -7,10 +7,10 @@ target triple = "aarch64" declare void @streaming_compatible_f() #0 "aarch64_pstate_sm_compatible" -; Function @streaming_callee doesn't contain any operations that may use ZA +; Function @non_streaming_callee doesn't contain any operations that may use ZA ; state and therefore can be legally inlined into a normal function. -define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_callee +define void @non_streaming_callee() #0 { +; CHECK-LABEL: define void @non_streaming_callee ; CHECK-SAME: () #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: call void @streaming_compatible_f() ; CHECK-NEXT: call void @streaming_compatible_f() @@ -21,26 +21,26 @@ define void @streaming_callee() #0 "aarch64_pstate_sm_enabled" { ret void } -; Inline call to @streaming_callee to remove a streaming mode change. -define void @non_streaming_caller_inline() #0 { -; CHECK-LABEL: define void @non_streaming_caller_inline +; Inline call to @non_streaming_callee to remove a streaming mode change. +define void @streaming_caller_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_inline ; CHECK-SAME: () #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: call void @streaming_compatible_f() ; CHECK-NEXT: call void @streaming_compatible_f() ; CHECK-NEXT: ret void ; - call void @streaming_callee() + call void @non_streaming_callee() ret void } -; Don't inline call to @streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change. -define void @streaming_caller_dont_inline() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_dont_inline +; Don't inline call to @non_streaming_callee when the inline-threshold is set to 1, because it does not eliminate a streaming-mode change. +define void @non_streaming_caller_dont_inline() #0 { +; CHECK-LABEL: define void @non_streaming_caller_dont_inline ; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: call void @streaming_callee() +; CHECK-NEXT: call void @non_streaming_callee() ; CHECK-NEXT: ret void ; - call void @streaming_callee() + call void @non_streaming_callee() ret void } diff --git a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll index 6cb1692..077a3aa 100644 --- a/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll +++ b/llvm/test/Transforms/Inline/AArch64/sme-pstatesm-attrs.ll @@ -86,7 +86,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_normal_callee_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_normal_callee_inline -; CHECK-SAME: () #[[ATTR6:[0-9]+]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -103,7 +103,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_streaming_callee_dont_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -120,7 +120,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_streaming_compatible_callee_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -137,7 +137,7 @@ entry: ; [ ] N -> SC + B define i32 @normal_caller_locally_streaming_callee_dont_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -154,7 +154,7 @@ entry: ; [x] N -> SC + B define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0 { ; CHECK-LABEL: define i32 @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -171,7 +171,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR7:[0-9]+]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -188,7 +188,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_streaming_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -205,7 +205,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -222,7 +222,7 @@ entry: ; [ ] S -> SC + B define i32 @streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -239,7 +239,7 @@ entry: ; [x] S -> SC + B define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i32 @streaming_caller_streaming_compatible_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -256,7 +256,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR8:[0-9]+]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -273,7 +273,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -290,7 +290,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -307,7 +307,7 @@ entry: ; [ ] N + B -> SC + B define i32 @locally_streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -324,7 +324,7 @@ entry: ; [x] N + B -> SC + B define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR8]] { +; CHECK-SAME: () #[[ATTR3]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -341,7 +341,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR9:[0-9]+]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -358,7 +358,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -375,7 +375,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -392,7 +392,7 @@ entry: ; [ ] SC -> SC + B define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -409,7 +409,7 @@ entry: ; [x] SC -> SC + B define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: define i32 @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline -; CHECK-SAME: () #[[ATTR9]] { +; CHECK-SAME: () #[[ATTR4]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @streaming_compatible_locally_streaming_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -425,7 +425,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline -; CHECK-SAME: () #[[ATTR10:[0-9]+]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i32 @normal_callee() ; CHECK-NEXT: ret i32 [[RES]] @@ -442,7 +442,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -459,7 +459,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -476,7 +476,7 @@ entry: ; [ ] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -493,7 +493,7 @@ entry: ; [x] SC + B -> SC + B define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline() #0 "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { ; CHECK-LABEL: define i32 @streaming_compatible_locally_streaming_caller_and_callee_inline -; CHECK-SAME: () #[[ATTR10]] { +; CHECK-SAME: () #[[ATTR5]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES_I:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: ret i32 [[RES_I]] @@ -505,7 +505,7 @@ entry: define void @normal_callee_with_inlineasm() #0 { ; CHECK-LABEL: define void @normal_callee_with_inlineasm -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void asm sideeffect " ; CHECK-NEXT: ret void @@ -517,7 +517,7 @@ entry: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define void @streaming_caller_normal_callee_with_inlineasm_dont_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @normal_callee_with_inlineasm() ; CHECK-NEXT: ret void @@ -529,7 +529,7 @@ entry: define i64 @normal_callee_with_intrinsic_call() #0 { ; CHECK-LABEL: define i64 @normal_callee_with_intrinsic_call -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i64 @llvm.aarch64.sve.cntb(i32 4) ; CHECK-NEXT: ret i64 [[RES]] @@ -541,7 +541,7 @@ entry: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i64 @streaming_caller_normal_callee_with_intrinsic_call_dont_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_with_intrinsic_call() ; CHECK-NEXT: ret i64 [[RES]] @@ -555,7 +555,7 @@ declare i64 @llvm.aarch64.sve.cntb(i32) define i64 @normal_callee_call_sme_state() #0 { ; CHECK-LABEL: define i64 @normal_callee_call_sme_state -; CHECK-SAME: () #[[ATTR6]] { +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call { i64, i64 } @__arm_sme_state() ; CHECK-NEXT: [[RES_0:%.*]] = extractvalue { i64, i64 } [[RES]], 0 @@ -571,7 +571,7 @@ declare {i64, i64} @__arm_sme_state() define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline() #0 "aarch64_pstate_sm_enabled" { ; CHECK-LABEL: define i64 @streaming_caller_normal_callee_call_sme_state_dont_inline -; CHECK-SAME: () #[[ATTR7]] { +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[RES:%.*]] = call i64 @normal_callee_call_sme_state() ; CHECK-NEXT: ret i64 [[RES]] @@ -583,57 +583,57 @@ entry: -declare void @streaming_body() "aarch64_pstate_sm_enabled" +declare void @nonstreaming_body() -define void @streaming_caller_single_streaming_callee() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_single_streaming_callee -; CHECK-SAME: () #[[ATTR7]] { -; CHECK-NEXT: call void @streaming_body() +define void @nonstreaming_caller_single_nonstreaming_callee() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_single_nonstreaming_callee +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @nonstreaming_body() ; CHECK-NEXT: ret void ; - call void @streaming_body() + call void @nonstreaming_body() ret void } -define void @streaming_caller_multiple_streaming_callees() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees -; CHECK-SAME: () #[[ATTR7]] { -; CHECK-NEXT: call void @streaming_body() -; CHECK-NEXT: call void @streaming_body() +define void @nonstreaming_caller_multiple_nonstreaming_callees() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_multiple_nonstreaming_callees +; CHECK-SAME: () #[[ATTR1]] { +; CHECK-NEXT: call void @nonstreaming_body() +; CHECK-NEXT: call void @nonstreaming_body() ; CHECK-NEXT: ret void ; - call void @streaming_body() - call void @streaming_body() + call void @nonstreaming_body() + call void @nonstreaming_body() ret void } ; Allow inlining, as inline it would not increase the number of streaming-mode changes. -define void @streaming_caller_single_streaming_callee_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_single_streaming_callee_inline -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: call void @streaming_body() +define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_nonstreaming_callee_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @nonstreaming_body() ; CHECK-NEXT: ret void ; - call void @streaming_caller_single_streaming_callee() + call void @nonstreaming_caller_single_nonstreaming_callee() ret void } -; Prevent inlining, as inline it would lead to multiple streaming-mode changes. -define void @streaming_caller_multiple_streaming_callees_dont_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_callees_dont_inline -; CHECK-SAME: () #[[ATTR6]] { -; CHECK-NEXT: call void @streaming_caller_multiple_streaming_callees() +; Prevent inlining, as inlining it would lead to multiple streaming-mode changes. +define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline +; CHECK-SAME: () #[[ATTR2]] { +; CHECK-NEXT: call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() ; CHECK-NEXT: ret void ; - call void @streaming_caller_multiple_streaming_callees() + call void @streaming_caller_to_nonstreaming_callee_with_multiple_nonstreaming_callees_dont_inline() ret void } declare void @streaming_compatible_body() "aarch64_pstate_sm_compatible" -define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee -; CHECK-SAME: () #[[ATTR7]] { +define void @nonstreaming_caller_single_streaming_compatible_callee() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_single_streaming_compatible_callee +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; @@ -641,9 +641,9 @@ define void @streaming_caller_single_streaming_compatible_callee() #0 "aarch64_ ret void } -define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch64_pstate_sm_enabled" { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees -; CHECK-SAME: () #[[ATTR7]] { +define void @nonstreaming_caller_multiple_streaming_compatible_callees() #0 { +; CHECK-LABEL: define void @nonstreaming_caller_multiple_streaming_compatible_callees +; CHECK-SAME: () #[[ATTR1]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void @@ -654,25 +654,67 @@ define void @streaming_caller_multiple_streaming_compatible_callees() #0 "aarch } ; Allow inlining, as inline would remove a streaming-mode change. -define void @streaming_caller_single_streaming_compatible_callee_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_single_streaming_compatible_callee_inline -; CHECK-SAME: () #[[ATTR6]] { +define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_single_streamingcompatible_callee_inline +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; - call void @streaming_caller_single_streaming_compatible_callee() + call void @nonstreaming_caller_single_streaming_compatible_callee() ret void } -; Allow inlining, as inline would remove several stremaing-mode changes. -define void @streaming_caller_multiple_streaming_compatible_callees_inline() #0 { -; CHECK-LABEL: define void @streaming_caller_multiple_streaming_compatible_callees_inline -; CHECK-SAME: () #[[ATTR6]] { +; Allow inlining, as inline would remove several streaming-mode changes. +define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline() #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @streaming_caller_to_nonstreaming_callee_with_multiple_streamingcompatible_callees_inline +; CHECK-SAME: () #[[ATTR2]] { ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: call void @streaming_compatible_body() ; CHECK-NEXT: ret void ; - call void @streaming_caller_multiple_streaming_compatible_callees() + call void @nonstreaming_caller_multiple_streaming_compatible_callees() + ret void +} + +define void @simple_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: define void @simple_streaming_function +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: ret void +; + store <vscale x 4 x i32> zeroinitializer, ptr %ptr + ret void +} + +; Don't allow inlining a streaming function into a non-streaming function. +define void @non_streaming_caller_streaming_callee_dont_inline(ptr %ptr) #0 { +; CHECK-LABEL: define void @non_streaming_caller_streaming_callee_dont_inline +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @simple_streaming_function(ptr [[PTR]]) +; CHECK-NEXT: ret void +; + call void @simple_streaming_function(ptr %ptr) + ret void +} + +define void @simple_locally_streaming_function(ptr %ptr) #0 "aarch64_pstate_sm_body" { +; CHECK-LABEL: define void @simple_locally_streaming_function +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[PTR]], align 16 +; CHECK-NEXT: ret void +; + store <vscale x 4 x i32> zeroinitializer, ptr %ptr + ret void +} + +; Don't allow inlining a locally-streaming function into a non-streaming function. +define void @non_streaming_caller_locally_streaming_callee_dont_inline(ptr %ptr) #0 { +; CHECK-LABEL: define void @non_streaming_caller_locally_streaming_callee_dont_inline +; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: call void @simple_locally_streaming_function(ptr [[PTR]]) +; CHECK-NEXT: ret void +; + call void @simple_locally_streaming_function(ptr %ptr) ret void } diff --git a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll index a92e0c2..e2f22b8 100644 --- a/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll +++ b/llvm/test/Transforms/InstCombine/gepofconstgepi8.ll @@ -293,3 +293,183 @@ entry: %p2 = getelementptr <vscale x 4 x i32>, ptr %p1, i64 %index ret ptr %p2 } + +define ptr @test_all_nuw(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_nuw( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr nuw i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_partial_nuw1(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_partial_nuw1( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_partial_nuw2(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_partial_nuw2( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr nuw i8, ptr %base, i64 1 + %index = add i64 %a, 2 + %p2 = getelementptr nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_partial_nuw3(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_partial_nuw3( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr nuw i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_nuw_disjoint(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_nuw_disjoint( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr nuw i8, ptr %base, i64 1 + %index = or disjoint i64 %a, 2 + %p2 = getelementptr nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_inbounds_nuw(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_inbounds_nuw( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_partial_inbounds1(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_partial_inbounds1( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr nuw i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_partial_inbounds2(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_partial_inbounds2( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_inbounds_partial_nuw1(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_inbounds_partial_nuw1( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 7 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr inbounds i8, ptr %base, i64 -1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_inbounds_partial_nuw2(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_inbounds_partial_nuw2( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1 + %index = add nuw i64 %a, 2 + %p2 = getelementptr inbounds i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_inbounds_partial_nuw3(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_inbounds_partial_nuw3( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr inbounds nuw i8, ptr %base, i64 1 + %index = add i64 %a, 2 + %p2 = getelementptr inbounds nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} + +define ptr @test_all_nusw_nuw(ptr %base, i64 %a) { +; CHECK-LABEL: define ptr @test_all_nusw_nuw( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nuw i8, ptr [[BASE]], i64 9 +; CHECK-NEXT: [[P2:%.*]] = getelementptr nuw i32, ptr [[TMP0]], i64 [[A]] +; CHECK-NEXT: ret ptr [[P2]] +; +entry: + %p1 = getelementptr nusw nuw i8, ptr %base, i64 1 + %index = add nsw nuw i64 %a, 2 + %p2 = getelementptr nusw nuw i32, ptr %p1, i64 %index + ret ptr %p2 +} diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index d9f729e..3454835 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -3025,3 +3025,31 @@ join: %umax = call noundef i32 @llvm.umax(i32 noundef %phi, i32 1) ret i32 %umax } + +define i32 @multiple_intrinsics_with_multiple_phi_uses(i1 %c, i32 %arg) { +; CHECK-LABEL: @multiple_intrinsics_with_multiple_phi_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[IF_END:%.*]] +; CHECK: if: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[ARG:%.*]], -8 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.fshl.i32(i32 [[ADD]], i32 [[ADD]], i32 29) +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[TMP0]], 1 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[TMP1]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[PHI]] +; +entry: + br i1 %c, label %if, label %if.end + +if: + %add = add i32 %arg, -8 + br label %if.end + +if.end: + %phi = phi i32 [ %add, %if ], [ 0, %entry ] + %fshl1 = call i32 @llvm.fshl.i32(i32 %phi, i32 %phi, i32 29) + %fshl2 = call i32 @llvm.fshl.i32(i32 %phi, i32 %phi, i32 29) + %add2 = add i32 %fshl1, %fshl2 + ret i32 %add2 +} diff --git a/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll new file mode 100644 index 0000000..1e089e1 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/scalable-extract-subvec-elt.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +define i1 @extract_const_idx(<vscale x 4 x i1> %a) { +; CHECK-LABEL: define i1 @extract_const_idx( +; CHECK-SAME: <vscale x 4 x i1> [[A:%.*]]) { +; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 4 x i1> [[A]], i64 1 +; CHECK-NEXT: ret i1 [[ELT]] +; + %subvec = call <vscale x 2 x i1> @llvm.vector.extract.nxv2i1.nxv4i1.i64(<vscale x 4 x i1> %a, i64 0) + %elt = extractelement <vscale x 2 x i1> %subvec, i32 1 + ret i1 %elt +} + +define float @extract_variable_idx(<vscale x 4 x float> %a, i32 %idx) { +; CHECK-LABEL: define float @extract_variable_idx( +; CHECK-SAME: <vscale x 4 x float> [[A:%.*]], i32 [[IDX:%.*]]) { +; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 4 x float> [[A]], i32 [[IDX]] +; CHECK-NEXT: ret float [[ELT]] +; + %subvec = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32.i64(<vscale x 4 x float> %a, i64 0) + %elt = extractelement <vscale x 2 x float> %subvec, i32 %idx + ret float %elt +} + +define float @negative_test(<vscale x 4 x float> %a) { +; CHECK-LABEL: define float @negative_test( +; CHECK-SAME: <vscale x 4 x float> [[A:%.*]]) { +; CHECK-NEXT: [[SUBVEC:%.*]] = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32(<vscale x 4 x float> [[A]], i64 2) +; CHECK-NEXT: [[ELT:%.*]] = extractelement <vscale x 2 x float> [[SUBVEC]], i64 1 +; CHECK-NEXT: ret float [[ELT]] +; + %subvec = call <vscale x 2 x float> @llvm.vector.extract.nxv2f32.nxv4f32.i64(<vscale x 4 x float> %a, i64 2) + %elt = extractelement <vscale x 2 x float> %subvec, i32 1 + ret float %elt +} diff --git a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll index 75b8509..6eed7f8 100644 --- a/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/const-fold-nvvm-unary-arithmetic.ll @@ -416,6 +416,54 @@ define float @test_round_ftz_f_neg_1_5() { ret float %res } +define double @test_round_d_2_5() { +; CHECK-LABEL: define double @test_round_d_2_5() { +; CHECK-NEXT: ret double 2.000000e+00 +; + %res = call double @llvm.nvvm.round.d(double 2.5) + ret double %res +} + +define float @test_round_f_2_5() { +; CHECK-LABEL: define float @test_round_f_2_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.round.f(float 2.5) + ret float %res +} + +define float @test_round_ftz_f_2_5() { +; CHECK-LABEL: define float @test_round_ftz_f_2_5() { +; CHECK-NEXT: ret float 2.000000e+00 +; + %res = call float @llvm.nvvm.round.ftz.f(float 2.5) + ret float %res +} + +define double @test_round_d_neg_2_5() { +; CHECK-LABEL: define double @test_round_d_neg_2_5() { +; CHECK-NEXT: ret double -2.000000e+00 +; + %res = call double @llvm.nvvm.round.d(double -2.5) + ret double %res +} + +define float @test_round_f_neg_2_5() { +; CHECK-LABEL: define float @test_round_f_neg_2_5() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.round.f(float -2.5) + ret float %res +} + +define float @test_round_ftz_f_neg_2_5() { +; CHECK-LABEL: define float @test_round_ftz_f_neg_2_5() { +; CHECK-NEXT: ret float -2.000000e+00 +; + %res = call float @llvm.nvvm.round.ftz.f(float -2.5) + ret float %res +} + define double @test_round_d_neg_subnorm() { ; CHECK-LABEL: define double @test_round_d_neg_subnorm() { ; CHECK-NEXT: ret double -0.000000e+00 diff --git a/llvm/test/Transforms/LICM/gep-reassociate.ll b/llvm/test/Transforms/LICM/gep-reassociate.ll index 630a751..0090c76 100644 --- a/llvm/test/Transforms/LICM/gep-reassociate.ll +++ b/llvm/test/Transforms/LICM/gep-reassociate.ll @@ -39,11 +39,13 @@ exit: ret void } -define void @both_inbounds_one_neg(ptr %ptr, i1 %c) { +define void @both_inbounds_one_neg(ptr %ptr, i1 %c, i64 %neg) { ; CHECK-LABEL: define void @both_inbounds_one_neg -; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[NEG:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 -1 +; CHECK-NEXT: [[IS_NEG:%.*]] = icmp slt i64 [[NEG]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NEG]]) +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[NEG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() @@ -55,13 +57,15 @@ define void @both_inbounds_one_neg(ptr %ptr, i1 %c) { ; CHECK-NEXT: ret void ; entry: + %is.neg = icmp slt i64 %neg, 0 + call void @llvm.assume(i1 %is.neg) br label %loop loop: %val = call i32 @get.i32() %val.ext = zext i32 %val to i64 %ptr2 = getelementptr inbounds i8, ptr %ptr, i64 %val.ext - %ptr3 = getelementptr i8, ptr %ptr2, i64 -1 + %ptr3 = getelementptr i8, ptr %ptr2, i64 %neg call void @use(ptr %ptr3) br i1 %c, label %loop, label %exit @@ -69,11 +73,13 @@ exit: ret void } -define void @both_inbounds_pos(ptr %ptr, i1 %c) { +define void @both_inbounds_pos(ptr %ptr, i1 %c, i64 %nonneg) { ; CHECK-LABEL: define void @both_inbounds_pos -; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]], i64 [[NONNEG:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 +; CHECK-NEXT: [[IS_NONNEG:%.*]] = icmp sge i64 [[NONNEG]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NONNEG]]) +; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 [[NONNEG]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[VAL:%.*]] = call i32 @get.i32() @@ -85,13 +91,15 @@ define void @both_inbounds_pos(ptr %ptr, i1 %c) { ; CHECK-NEXT: ret void ; entry: + %is.nonneg = icmp sge i64 %nonneg, 0 + call void @llvm.assume(i1 %is.nonneg) br label %loop loop: %val = call i32 @get.i32() %val.ext = zext i32 %val to i64 %ptr2 = getelementptr inbounds i8, ptr %ptr, i64 %val.ext - %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 1 + %ptr3 = getelementptr inbounds i8, ptr %ptr2, i64 %nonneg call void @use(ptr %ptr3) br i1 %c, label %loop, label %exit @@ -440,3 +448,32 @@ latch: exit: ret void } + +; Do not reassociate constant offset GEP. +define void @constant_offset(ptr %ptr, i1 %c) { +; CHECK-LABEL: define void @constant_offset +; CHECK-SAME: (ptr [[PTR:%.*]], i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[VAL:%.*]] = call i64 @get.i64() +; CHECK-NEXT: [[GEP_BASE:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[VAL]] +; CHECK-NEXT: [[GEP_OFF:%.*]] = getelementptr i8, ptr [[GEP_BASE]], i64 1 +; CHECK-NEXT: call void @use(ptr [[GEP_OFF]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %val = call i64 @get.i64() + %gep.base = getelementptr i8, ptr %ptr, i64 %val + %gep.off = getelementptr i8, ptr %gep.base, i64 1 + call void @use(ptr %gep.off) + br i1 %c, label %loop, label %exit + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll index aa954aeb..9003072 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/vscale-fixups.ll @@ -383,14 +383,14 @@ define void @vscale_squared_offset(ptr %alloc) #0 { ; COMMON-LABEL: vscale_squared_offset: ; COMMON: // %bb.0: // %entry ; COMMON-NEXT: rdvl x9, #1 +; COMMON-NEXT: rdvl x10, #4 ; COMMON-NEXT: fmov z0.s, #4.00000000 -; COMMON-NEXT: mov x8, xzr ; COMMON-NEXT: lsr x9, x9, #4 ; COMMON-NEXT: fmov z1.s, #8.00000000 -; COMMON-NEXT: cntw x10 +; COMMON-NEXT: mov x8, xzr ; COMMON-NEXT: ptrue p0.s, vl1 -; COMMON-NEXT: umull x9, w9, w9 -; COMMON-NEXT: lsl x9, x9, #6 +; COMMON-NEXT: umull x9, w9, w10 +; COMMON-NEXT: cntw x10 ; COMMON-NEXT: cmp x8, x10 ; COMMON-NEXT: b.ge .LBB6_2 ; COMMON-NEXT: .LBB6_1: // %for.body diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll index 953dc278..dd2913d 100644 --- a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll +++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll @@ -444,3 +444,158 @@ loop: exit: ret i64 %rdx.next } + +define void @reduction_with_intermediate_store(ptr %src, ptr %sum) { +; CHECK-LABEL: define void @reduction_with_intermediate_store( +; CHECK-SAME: ptr [[SRC:%.*]], ptr [[SUM:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[SUM_PROMOTED:%.*]] = load i32, ptr [[SUM]], align 4 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[SUM_PROMOTED]], %[[ENTRY]] ], [ [[RED_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[RED_NEXT:%.*]] = add nsw i32 [[RED]], [[L]] +; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 +; CHECK-NEXT: [[RED_NEXT_1:%.*]] = add nsw i32 [[RED_NEXT]], [[L_1]] +; CHECK-NEXT: store i32 [[RED_NEXT_1]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 +; CHECK-NEXT: [[RED_NEXT_2:%.*]] = add nsw i32 [[RED_NEXT_1]], [[L_2]] +; CHECK-NEXT: store i32 [[RED_NEXT_2]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds nuw i32, ptr [[SRC]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_SRC_3]], align 4 +; CHECK-NEXT: [[RED_NEXT_3]] = add nsw i32 [[RED_NEXT_2]], [[L_3]] +; CHECK-NEXT: store i32 [[RED_NEXT_3]], ptr [[SUM]], align 4 +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 10000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %sum.promoted = load i32, ptr %sum, align 4 + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi i32 [ %sum.promoted, %entry ], [ %red.next, %loop ] + %gep.src = getelementptr inbounds nuw i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %red.next = add nsw i32 %red, %l + store i32 %red.next, ptr %sum, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %ec = icmp eq i64 %iv.next, 10000 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +declare i32 @foo() + +; Loop with a call cannot be handled by LoopVectorize, introducing additional +; accumulators when unrolling increases throughput. +define i32 @test_add_with_call(i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_with_call( +; CHECK-SAME: i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[L:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[L_1:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: [[L_2:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[L_3:%.*]] = call i32 @foo() +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[BIN_RDX2:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[BIN_RDX2]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %l = call i32 @foo() + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} + +; Loop with backward dependence cannot be handled LoopVectorize, introducing additional +; accumulators when unrolling increases throughput. +define i32 @test_add_with_backward_dep(ptr %p, i64 %n, i32 %start) { +; CHECK-LABEL: define i32 @test_add_with_backward_dep( +; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1]], align 4 +; CHECK-NEXT: [[RDX_NEXT:%.*]] = add i32 [[RDX]], [[L]] +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2 +; CHECK-NEXT: [[GEP_11:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP_11]], align 4 +; CHECK-NEXT: [[GEP_1_1:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1_1]], align 4 +; CHECK-NEXT: [[RDX_2:%.*]] = add i32 [[RDX_NEXT]], [[L_1]] +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3 +; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_2]], align 4 +; CHECK-NEXT: [[GEP_1_2:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1_2]], align 4 +; CHECK-NEXT: [[RDX_NEXT_2:%.*]] = add i32 [[RDX_2]], [[L_2]] +; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4 +; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[L_3:%.*]] = load i32, ptr [[GEP_3]], align 4 +; CHECK-NEXT: [[GEP_1_3:%.*]] = getelementptr inbounds nuw i32, ptr [[P]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: store i32 0, ptr [[GEP_1_3]], align 4 +; CHECK-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_NEXT_2]], [[L_3]] +; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000 +; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[BIN_RDX3:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[BIN_RDX3]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ %start, %entry ], [ %rdx.next, %loop ] + %iv.next = add i64 %iv, 1 + %gep = getelementptr inbounds nuw i32, ptr %p, i64 %iv + %l = load i32, ptr %gep + %gep.1 = getelementptr inbounds nuw i32, ptr %p, i64 %iv.next + store i32 0, ptr %gep.1 + %rdx.next = add i32 %rdx, %l + %ec = icmp ne i64 %iv.next, 1000 + br i1 %ec, label %loop, label %exit + +exit: + ret i32 %rdx.next +} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll new file mode 100644 index 0000000..8495dee --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 +; REQUIRES: asserts +; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \ +; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s + +; FIXME: Hoisted vector code should be costed with scalable cost. +; In this example, `<vscale x 4 x float> @llvm.minimumnum` has an invalid cost, +; and hence should not be produced by LoopVectorize. + +; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00) +define void @cost_hoisted_vector_code(ptr %p, float %arg) { +; CHECK-LABEL: define void @cost_hoisted_vector_code( +; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[ARG]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]] +; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> [[BROADCAST_SPLAT]], <vscale x 4 x float> zeroinitializer) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]] +; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.0) + %gep.p.red = getelementptr float, ptr %p, i64 %iv + store float %res, ptr %gep.p.red, align 4 + %iv.next = add i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, 0 + br i1 %exit.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +declare float @llvm.minimumnum.f32(float, float) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll index 21266e5..162440a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/bf16.ll @@ -1,6 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFBFMIN -; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFBFMIN-PREDICATED +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFBFMIN ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -S | FileCheck %s -check-prefix=ZVFBFMIN define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { @@ -22,24 +22,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-ZVFBFMIN: [[EXIT]]: ; NO-ZVFBFMIN-NEXT: ret void ; -; NO-ZVFBFMIN-PREDICATED-LABEL: define void @fadd( -; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] -; NO-ZVFBFMIN-PREDICATED: [[LOOP]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd bfloat [[X]], [[Y]] -; NO-ZVFBFMIN-PREDICATED-NEXT: store bfloat [[Z]], ptr [[A_GEP]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] -; NO-ZVFBFMIN-PREDICATED: [[EXIT]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: ret void -; ; ZVFBFMIN-LABEL: define void @fadd( ; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; ZVFBFMIN-NEXT: [[ENTRY:.*]]: @@ -152,54 +134,6 @@ define void @vfwmaccbf16.vv(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 ; NO-ZVFBFMIN: [[EXIT]]: ; NO-ZVFBFMIN-NEXT: ret void ; -; NO-ZVFBFMIN-PREDICATED-LABEL: define void @vfwmaccbf16.vv( -; NO-ZVFBFMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { -; NO-ZVFBFMIN-PREDICATED-NEXT: [[ENTRY:.*]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; NO-ZVFBFMIN-PREDICATED: [[VECTOR_PH]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[VECTOR_BODY:.*]] -; NO-ZVFBFMIN-PREDICATED: [[VECTOR_BODY]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP:%.*]] = getelementptr float, ptr [[C]], i64 [[I]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = load <4 x bfloat>, ptr [[A_GEP]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = load <4 x bfloat>, ptr [[B_GEP]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[WIDE_MASKED_LOAD4:%.*]] = load <4 x float>, ptr [[C_GEP]], align 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP4:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD]] to <4 x float> -; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP5:%.*]] = fpext <4 x bfloat> [[WIDE_MASKED_LOAD3]] to <4 x float> -; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[WIDE_MASKED_LOAD4]]) -; NO-ZVFBFMIN-PREDICATED-NEXT: store <4 x float> [[TMP6]], ptr [[C_GEP]], align 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[I]], 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; NO-ZVFBFMIN-PREDICATED: [[MIDDLE_BLOCK]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] -; NO-ZVFBFMIN-PREDICATED: [[SCALAR_PH]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; NO-ZVFBFMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] -; NO-ZVFBFMIN-PREDICATED: [[LOOP]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: [[I1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[A_GEP1:%.*]] = getelementptr bfloat, ptr [[A]], i64 [[I1]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[B_GEP1:%.*]] = getelementptr bfloat, ptr [[B]], i64 [[I1]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[C_GEP1:%.*]] = getelementptr float, ptr [[C]], i64 [[I1]] -; NO-ZVFBFMIN-PREDICATED-NEXT: [[X:%.*]] = load bfloat, ptr [[A_GEP1]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y:%.*]] = load bfloat, ptr [[B_GEP1]], align 2 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[Z:%.*]] = load float, ptr [[C_GEP1]], align 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[X_EXT:%.*]] = fpext bfloat [[X]] to float -; NO-ZVFBFMIN-PREDICATED-NEXT: [[Y_EXT:%.*]] = fpext bfloat [[Y]] to float -; NO-ZVFBFMIN-PREDICATED-NEXT: [[FMULADD:%.*]] = call float @llvm.fmuladd.f32(float [[X_EXT]], float [[Y_EXT]], float [[Z]]) -; NO-ZVFBFMIN-PREDICATED-NEXT: store float [[FMULADD]], ptr [[C_GEP1]], align 4 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I1]], 1 -; NO-ZVFBFMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; NO-ZVFBFMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] -; NO-ZVFBFMIN-PREDICATED: [[EXIT]]: -; NO-ZVFBFMIN-PREDICATED-NEXT: ret void -; ; ZVFBFMIN-LABEL: define void @vfwmaccbf16.vv( ; ZVFBFMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { ; ZVFBFMIN-NEXT: [[ENTRY:.*]]: @@ -274,21 +208,3 @@ loop: exit: ret void } -;. -; NO-ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; NO-ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; NO-ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; NO-ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. -; NO-ZVFBFMIN-PREDICATED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; NO-ZVFBFMIN-PREDICATED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; NO-ZVFBFMIN-PREDICATED: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; NO-ZVFBFMIN-PREDICATED: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. -; ZVFBFMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; ZVFBFMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; ZVFBFMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; ZVFBFMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -; ZVFBFMIN: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} -; ZVFBFMIN: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll index 53e43e1..effaf57 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/f16.ll @@ -1,6 +1,6 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=NO-ZVFHMIN -; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFHMIN-PREDICATED +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue | FileCheck %s -check-prefix=NO-ZVFHMIN ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -S | FileCheck %s -check-prefix=ZVFHMIN define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { @@ -22,24 +22,6 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; NO-ZVFHMIN: [[EXIT]]: ; NO-ZVFHMIN-NEXT: ret void ; -; NO-ZVFHMIN-PREDICATED-LABEL: define void @fadd( -; NO-ZVFHMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; NO-ZVFHMIN-PREDICATED-NEXT: [[ENTRY:.*]]: -; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] -; NO-ZVFHMIN-PREDICATED: [[LOOP]]: -; NO-ZVFHMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] -; NO-ZVFHMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]] -; NO-ZVFHMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]] -; NO-ZVFHMIN-PREDICATED-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2 -; NO-ZVFHMIN-PREDICATED-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2 -; NO-ZVFHMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]] -; NO-ZVFHMIN-PREDICATED-NEXT: store half [[Z]], ptr [[A_GEP]], align 2 -; NO-ZVFHMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1 -; NO-ZVFHMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] -; NO-ZVFHMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] -; NO-ZVFHMIN-PREDICATED: [[EXIT]]: -; NO-ZVFHMIN-PREDICATED-NEXT: ret void -; ; ZVFHMIN-LABEL: define void @fadd( ; ZVFHMIN-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { ; ZVFHMIN-NEXT: [[ENTRY:.*]]: @@ -86,6 +68,23 @@ define void @fadd(ptr noalias %a, ptr noalias %b, i64 %n) { ; ZVFHMIN: [[EXIT]]: ; ZVFHMIN-NEXT: ret void ; +; NO-ZVFHMIN-PREDICATED-LABEL: define void @fadd( +; NO-ZVFHMIN-PREDICATED-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; NO-ZVFHMIN-PREDICATED-NEXT: [[ENTRY:.*]]: +; NO-ZVFHMIN-PREDICATED-NEXT: br label %[[LOOP:.*]] +; NO-ZVFHMIN-PREDICATED: [[LOOP]]: +; NO-ZVFHMIN-PREDICATED-NEXT: [[I:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[LOOP]] ] +; NO-ZVFHMIN-PREDICATED-NEXT: [[A_GEP:%.*]] = getelementptr half, ptr [[A]], i64 [[I]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[B_GEP:%.*]] = getelementptr half, ptr [[B]], i64 [[I]] +; NO-ZVFHMIN-PREDICATED-NEXT: [[X:%.*]] = load half, ptr [[A_GEP]], align 2 +; NO-ZVFHMIN-PREDICATED-NEXT: [[Y:%.*]] = load half, ptr [[B_GEP]], align 2 +; NO-ZVFHMIN-PREDICATED-NEXT: [[Z:%.*]] = fadd half [[X]], [[Y]] +; NO-ZVFHMIN-PREDICATED-NEXT: store half [[Z]], ptr [[A_GEP]], align 2 +; NO-ZVFHMIN-PREDICATED-NEXT: [[I_NEXT]] = add i64 [[I]], 1 +; NO-ZVFHMIN-PREDICATED-NEXT: [[DONE:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; NO-ZVFHMIN-PREDICATED-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]] +; NO-ZVFHMIN-PREDICATED: [[EXIT]]: +; NO-ZVFHMIN-PREDICATED-NEXT: ret void entry: br label %loop loop: @@ -102,9 +101,3 @@ loop: exit: ret void } -;. -; ZVFHMIN: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} -; ZVFHMIN: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} -; ZVFHMIN: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} -; ZVFHMIN: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} -;. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll new file mode 100644 index 0000000..c5396f2 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-store-with-gap.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -mtriple=riscv64 -mattr=+v -passes=loop-vectorize \ +; RUN: -scalable-vectorization=off -enable-masked-interleaved-mem-accesses \ +; RUN: -force-vector-interleave=1 -riscv-v-vector-bits-min=1024 -S < %s | FileCheck %s + +define void @store_factor_2_with_tail_gap(i64 %n, ptr %a) { +; CHECK-LABEL: define void @store_factor_2_with_tail_gap( +; CHECK-SAME: i64 [[N:%.*]], ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VEC_IND]], <16 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> +; CHECK-NEXT: call void @llvm.masked.store.v32i64.p0(<32 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], i32 8, <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: store i64 [[INDVARS_IV]], ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %0 = shl nsw i64 %iv, 1 + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %0 + store i64 %iv, ptr %arrayidx, align 8 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll new file mode 100644 index 0000000..554ce7b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll @@ -0,0 +1,1481 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt < %s -p loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s + +; Reduction can be vectorized + +; ADD + +define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @add( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP10]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 %add +} + +; OR + +define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @or( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7]] = or <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[OR:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[OR]] = or i32 [[TMP10]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[OR_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %or = or i32 %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 %or +} + +; AND + +define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @and( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> splat (i32 -1), i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7]] = and <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[AND:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[AND]] = and i32 [[TMP10]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[AND_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %and = and i32 %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 %and +} + +; XOR + +define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @xor( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ insertelement (<vscale x 4 x i32> zeroinitializer, i32 2, i32 0), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7]] = xor <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[XOR:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[XOR]] = xor i32 [[TMP10]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[XOR_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %xor = xor i32 %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 %xor +} + +; SMIN + +define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @smin( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ splat (i32 2), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[TMP11]], [[SUM_010]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[SUM_010]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi i32 [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp.i = icmp slt i32 %0, %sum.010 + %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret i32 %.sroa.speculated +} + +; UMAX + +define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @umax( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ splat (i32 2), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt <vscale x 4 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp ugt i32 [[TMP11]], [[SUM_010]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], i32 [[TMP11]], i32 [[SUM_010]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi i32 [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %cmp.i = icmp ugt i32 %0, %sum.010 + %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret i32 %.sroa.speculated +} + +; FADD (FAST) + +define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { +; CHECK-LABEL: define float @fadd_fast( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7]] = fadd fast <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = fadd fast float [[TMP10]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %add = fadd fast float %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret float %add +} + +define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfh" { +; CHECK-LABEL: define half @fadd_fast_half_zvfh( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7]] = fadd fast <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> [[TMP7]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP10:%.*]] = load half, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP10]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret half [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv + %0 = load half, ptr %arrayidx, align 4 + %add = fadd fast half %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret half %add +} + +define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfhmin" { +; CHECK-LABEL: define half @fadd_fast_half_zvfhmin( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = fadd fast <16 x half> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP3]] = fadd fast <16 x half> [[WIDE_LOAD2]], [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x half> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = load half, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = fadd fast half [[TMP6]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi half [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret half [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv + %0 = load half, ptr %arrayidx, align 4 + %add = fadd fast half %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret half %add +} + +define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfbfmin" { +; CHECK-LABEL: define bfloat @fadd_fast_bfloat( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = fadd fast <16 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP3]] = fadd fast <16 x bfloat> [[WIDE_LOAD2]], [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <16 x bfloat> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD]] = fadd fast bfloat [[TMP6]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi bfloat [ [[ADD]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret bfloat [[ADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ] + %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv + %0 = load bfloat, ptr %arrayidx, align 4 + %add = fadd fast bfloat %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret bfloat %add +} + +; FMIN (FAST) + +define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { +; CHECK-LABEL: define float @fmin_fast( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt float [[TMP11]], [[SUM_07]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], float [[TMP11]], float [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi float [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %cmp.i = fcmp olt float %0, %sum.07 + %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret float %.sroa.speculated +} + +define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 { +; CHECK-LABEL: define half @fmin_fast_half_zvfhmin( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt half [[TMP11]], [[SUM_07]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], half [[TMP11]], half [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi half [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret half [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv + %0 = load half, ptr %arrayidx, align 4 + %cmp.i = fcmp olt half %0, %sum.07 + %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret half %.sroa.speculated +} + +define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 { +; CHECK-LABEL: define bfloat @fmin_fast_bfloat_zvfbfmin( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR6:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt bfloat [[TMP11]], [[SUM_07]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], bfloat [[TMP11]], bfloat [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi bfloat [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret bfloat [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv + %0 = load bfloat, ptr %arrayidx, align 4 + %cmp.i = fcmp olt bfloat %0, %sum.07 + %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret bfloat %.sroa.speculated +} + +; FMAX (FAST) + +define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { +; CHECK-LABEL: define float @fmax_fast( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt float [[TMP11]], [[SUM_07]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], float [[TMP11]], float [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi float [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %cmp.i = fcmp fast ogt float %0, %sum.07 + %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret float %.sroa.speculated +} + +define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 { +; CHECK-LABEL: define half @fmax_fast_half_zvfhmin( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call fast half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt half [[TMP11]], [[SUM_07]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], half [[TMP11]], half [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi half [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret half [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv + %0 = load half, ptr %arrayidx, align 4 + %cmp.i = fcmp fast ogt half %0, %sum.07 + %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret half %.sroa.speculated +} + +define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 { +; CHECK-LABEL: define bfloat @fmax_fast_bfloat_zvfbfmin( +; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR6]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x bfloat> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x bfloat>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP8]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16(<vscale x 8 x bfloat> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[DOTSROA_SPECULATED:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast ogt bfloat [[TMP11]], [[SUM_07]] +; CHECK-NEXT: [[DOTSROA_SPECULATED]] = select i1 [[CMP_I]], bfloat [[TMP11]], bfloat [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[DOTSROA_SPECULATED_LCSSA:%.*]] = phi bfloat [ [[DOTSROA_SPECULATED]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret bfloat [[DOTSROA_SPECULATED_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] + %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv + %0 = load bfloat, ptr %arrayidx, align 4 + %cmp.i = fcmp fast ogt bfloat %0, %sum.07 + %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret bfloat %.sroa.speculated +} + +; Reduction cannot be vectorized + +; MUL + +define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @mul( +; CHECK-SAME: ptr captures(none) [[A:%.*]], ptr readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 16 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <8 x i32> [ splat (i32 1), %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = mul <8 x i32> [[WIDE_LOAD]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP3]] = mul <8 x i32> [[WIDE_LOAD2]], [[VEC_PHI1]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i32> [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MUL:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP6]], [[SUM_07]] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP35:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[MUL_LCSSA]] +; +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %mul = mul nsw i32 %0, %sum.07 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret i32 %mul +} + +; Note: This test was added to ensure we always check the legality of reductions before checking for memory dependencies +define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { +; CHECK-LABEL: define i32 @memory_dependence( +; CHECK-SAME: ptr noalias captures(none) [[A:%.*]], ptr noalias readonly captures(none) [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ <i32 2, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = mul <8 x i32> [[WIDE_LOAD1]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[MUL:%.*]], %[[FOR_BODY]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[ADD2:%.*]] = add nuw nsw i64 [[I]], 32 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[ADD2]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 +; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP9]], [[SUM]] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[MUL_LCSSA:%.*]] = phi i32 [ [[MUL]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[MUL_LCSSA]] +; +entry: + br label %for.body + +for.body: + %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] + %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i + %1 = load i32, ptr %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + %add2 = add nuw nsw i64 %i, 32 + %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2 + store i32 %add, ptr %arrayidx3, align 4 + %mul = mul nsw i32 %1, %sum + %inc = add nuw nsw i64 %i, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret i32 %mul +} + +define float @fmuladd(ptr %a, ptr %b, i64 %n) { +; CHECK-LABEL: define float @fmuladd( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x float> [ insertelement (<vscale x 4 x float> splat (float -0.000000e+00), float 0.000000e+00, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD1]], <vscale x 4 x float> [[VEC_PHI]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[MULADD]] = tail call reassoc float @llvm.fmuladd.f32(float [[TMP11]], float [[TMP12]], float [[SUM_07]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi float [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret float [[MULADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv + %0 = load float, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv + %1 = load float, ptr %arrayidx2, align 4 + %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret float %muladd +} + +define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" { +; CHECK-LABEL: define half @fmuladd_f16_zvfh( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x half> [ insertelement (<vscale x 8 x half> splat (half 0xH8000), half 0xH0000, i32 0), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 8 x half>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8]] = call reassoc <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[WIDE_LOAD1]], <vscale x 8 x half> [[VEC_PHI]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, <vscale x 8 x half> [[TMP8]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP11:%.*]] = load half, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load half, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP11]], half [[TMP12]], half [[SUM_07]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP10]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret half [[MULADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv + %0 = load half, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv + %1 = load half, ptr %arrayidx2, align 4 + %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret half %muladd +} + + +; We can't scalably vectorize reductions of f16 with zvfhmin or bf16 with zvfbfmin, so make sure we use fixed-length vectors instead. + +define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvfhmin" { +; CHECK-LABEL: define half @fmuladd_f16_zvfhmin( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x half> [ <half 0xH0000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x half> [ splat (half 0xH8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds half, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x half>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x half>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds half, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x half>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x half>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]], <16 x half> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP5]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]], <16 x half> [[VEC_PHI1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi half [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0xH0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi half [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load half, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP9:%.*]] = load half, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[MULADD]] = tail call reassoc half @llvm.fmuladd.f16(half [[TMP8]], half [[TMP9]], half [[SUM_07]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi half [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret half [[MULADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ] + %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv + %0 = load half, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv + %1 = load half, ptr %arrayidx2, align 4 + %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret half %muladd +} + +define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin" { +; CHECK-LABEL: define bfloat @fmuladd_bf16( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i64 [[N:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 32 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 32 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x bfloat> [ <bfloat 0xR0000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000, bfloat 0xR8000>, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <16 x bfloat> [ splat (bfloat 0xR8000), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds bfloat, ptr [[TMP0]], i32 16 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x bfloat>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x bfloat>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds bfloat, ptr [[TMP2]], i32 16 +; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x bfloat>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x bfloat>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[TMP4]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]], <16 x bfloat> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP5]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]], <16 x bfloat> [[VEC_PHI1]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi bfloat [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0xR0000, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[SUM_07:%.*]] = phi bfloat [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MULADD:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[TMP8:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[B]], i64 [[IV]] +; CHECK-NEXT: [[TMP9:%.*]] = load bfloat, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[MULADD]] = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat [[TMP8]], bfloat [[TMP9]], bfloat [[SUM_07]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[MULADD_LCSSA:%.*]] = phi bfloat [ [[MULADD]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret bfloat [[MULADD_LCSSA]] +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %muladd, %for.body ] + %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv + %0 = load bfloat, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds bfloat, ptr %b, i64 %iv + %1 = load bfloat, ptr %arrayidx2, align 4 + %muladd = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat %0, bfloat %1, bfloat %sum.07) + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret bfloat %muladd +} + +declare float @llvm.fmuladd.f32(float, float, float) + +attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } +attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfhmin,+zvfhmin"} +attributes #2 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfbfmin,+zvfbfmin"} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll deleted file mode 100644 index 695a0c3..0000000 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll +++ /dev/null @@ -1,729 +0,0 @@ -; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on \ -; RUN: -riscv-v-vector-bits-max=128 \ -; RUN: -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \ -; RUN: -pass-remarks-missed=loop-vectorize -mtriple riscv64-linux-gnu \ -; RUN: -force-target-max-vector-interleave=2 -mattr=+v,+f -S 2>%t \ -; RUN: | FileCheck %s -check-prefix=CHECK -; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK - -; Reduction can be vectorized - -; ADD - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @add(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @add -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32> -; CHECK: %[[ADD1:.*]] = add <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[ADD2:.*]] = add <vscale x 8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[ADD:.*]] = add <vscale x 8 x i32> %[[ADD2]], %[[ADD1]] -; CHECK-NEXT: call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> %[[ADD]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi i32 [ 2, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %add = add nsw i32 %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %for.body, %entry - ret i32 %add -} - -; OR - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @or(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @or -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32> -; CHECK: %[[OR1:.*]] = or <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[OR2:.*]] = or <vscale x 8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[OR:.*]] = or <vscale x 8 x i32> %[[OR2]], %[[OR1]] -; CHECK-NEXT: call i32 @llvm.vector.reduce.or.nxv8i32(<vscale x 8 x i32> %[[OR]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi i32 [ 2, %entry ], [ %or, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %or = or i32 %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %for.body, %entry - ret i32 %or -} - -; AND - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @and(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @and -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32> -; CHECK: %[[AND1:.*]] = and <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[AND2:.*]] = and <vscale x 8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[ABD:.*]] = and <vscale x 8 x i32> %[[ADD2]], %[[AND1]] -; CHECK-NEXT: call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %[[ADD]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi i32 [ 2, %entry ], [ %and, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %and = and i32 %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %for.body, %entry - ret i32 %and -} - -; XOR - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define i32 @xor(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @xor -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32> -; CHECK: %[[XOR1:.*]] = xor <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[XOR2:.*]] = xor <vscale x 8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[XOR:.*]] = xor <vscale x 8 x i32> %[[XOR2]], %[[XOR1]] -; CHECK-NEXT: call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %[[XOR]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi i32 [ 2, %entry ], [ %xor, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %xor = xor i32 %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %for.body, %entry - ret i32 %xor -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -; SMIN - -define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @smin -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32> -; CHECK: %[[ICMP1:.*]] = icmp slt <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[ICMP2:.*]] = icmp slt <vscale x 8 x i32> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]) -; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %[[RDX]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %cmp.i = icmp slt i32 %0, %sum.010 - %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret i32 %.sroa.speculated -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -; UMAX - -define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @umax -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x i32> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x i32> -; CHECK: %[[ICMP1:.*]] = icmp ugt <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[ICMP2:.*]] = icmp ugt <vscale x 8 x i32> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]) -; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> %[[RDX]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.010 = phi i32 [ 2, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %cmp.i = icmp ugt i32 %0, %sum.010 - %.sroa.speculated = select i1 %cmp.i, i32 %0, i32 %sum.010 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret i32 %.sroa.speculated -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -; FADD (FAST) - -define float @fadd_fast(ptr noalias nocapture readonly %a, i64 %n) { -; CHECK-LABEL: @fadd_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float> -; CHECK: %[[ADD1:.*]] = fadd fast <vscale x 8 x float> %[[LOAD1]] -; CHECK: %[[ADD2:.*]] = fadd fast <vscale x 8 x float> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[ADD:.*]] = fadd fast <vscale x 8 x float> %[[ADD2]], %[[ADD1]] -; CHECK-NEXT: call fast float @llvm.vector.reduce.fadd.nxv8f32(float 0.000000e+00, <vscale x 8 x float> %[[ADD]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv - %0 = load float, ptr %arrayidx, align 4 - %add = fadd fast float %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret float %add -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define half @fadd_fast_half_zvfh(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfh" { -; CHECK-LABEL: @fadd_fast_half_zvfh -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half> -; CHECK: %[[FADD1:.*]] = fadd fast <vscale x 8 x half> %[[LOAD1]] -; CHECK: %[[FADD2:.*]] = fadd fast <vscale x 8 x half> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = fadd fast <vscale x 8 x half> %[[FADD2]], %[[FADD1]] -; CHECK: call fast half @llvm.vector.reduce.fadd.nxv8f16(half 0xH0000, <vscale x 8 x half> %[[RDX]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv - %0 = load half, ptr %arrayidx, align 4 - %add = fadd fast half %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret half %add -} - -; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2) -define half @fadd_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfhmin" { -; CHECK-LABEL: @fadd_fast_half_zvfhmin -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <16 x half> -; CHECK: %[[LOAD2:.*]] = load <16 x half> -; CHECK: %[[FADD1:.*]] = fadd fast <16 x half> %[[LOAD1]] -; CHECK: %[[FADD2:.*]] = fadd fast <16 x half> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = fadd fast <16 x half> %[[FADD2]], %[[FADD1]] -; CHECK: call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> %[[RDX]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi half [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv - %0 = load half, ptr %arrayidx, align 4 - %add = fadd fast half %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret half %add -} - -; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2) -define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) "target-features"="+zvfbfmin" { -; CHECK-LABEL: @fadd_fast_bfloat -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <16 x bfloat> -; CHECK: %[[LOAD2:.*]] = load <16 x bfloat> -; CHECK: %[[FADD1:.*]] = fadd fast <16 x bfloat> %[[LOAD1]] -; CHECK: %[[FADD2:.*]] = fadd fast <16 x bfloat> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = fadd fast <16 x bfloat> %[[FADD2]], %[[FADD1]] -; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR0000, <16 x bfloat> %[[RDX]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %add, %for.body ] - %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv - %0 = load bfloat, ptr %arrayidx, align 4 - %add = fadd fast bfloat %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret bfloat %add -} - -; FMIN (FAST) - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { -; CHECK-LABEL: @fmin_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float> -; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x float> %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x float> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x float> %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]] -; CHECK-NEXT: call float @llvm.vector.reduce.fmin.nxv8f32(<vscale x 8 x float> %[[SEL]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv - %0 = load float, ptr %arrayidx, align 4 - %cmp.i = fcmp olt float %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret float %.sroa.speculated -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 { -; CHECK-LABEL: @fmin_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half> -; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x half> %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x half> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x half> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x half> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x half> %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x half> %[[SEL1]], <vscale x 8 x half> %[[SEL2]] -; CHECK-NEXT: call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> %[[SEL]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv - %0 = load half, ptr %arrayidx, align 4 - %cmp.i = fcmp olt half %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret half %.sroa.speculated -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 { -; CHECK-LABEL: @fmin_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x bfloat> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x bfloat> -; CHECK: %[[FCMP1:.*]] = fcmp olt <vscale x 8 x bfloat> %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp olt <vscale x 8 x bfloat> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x bfloat> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x bfloat> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp olt <vscale x 8 x bfloat> %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x bfloat> %[[SEL1]], <vscale x 8 x bfloat> %[[SEL2]] -; CHECK-NEXT: call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> %[[SEL]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv - %0 = load bfloat, ptr %arrayidx, align 4 - %cmp.i = fcmp olt bfloat %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret bfloat %.sroa.speculated -} - -; FMAX (FAST) - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 { -; CHECK-LABEL: @fmax_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float> -; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x float> %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]] -; CHECK-NEXT: call fast float @llvm.vector.reduce.fmax.nxv8f32(<vscale x 8 x float> %[[SEL]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv - %0 = load float, ptr %arrayidx, align 4 - %cmp.i = fcmp fast ogt float %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret float %.sroa.speculated -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 { -; CHECK-LABEL: @fmax_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x half> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x half> -; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x half> %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x half> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x half> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x half> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x half> %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x half> %[[SEL1]], <vscale x 8 x half> %[[SEL2]] -; CHECK-NEXT: call fast half @llvm.vector.reduce.fmax.nxv8f16(<vscale x 8 x half> %[[SEL]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv - %0 = load half, ptr %arrayidx, align 4 - %cmp.i = fcmp fast ogt half %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret half %.sroa.speculated -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 { -; CHECK-LABEL: @fmax_fast -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x bfloat> -; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x bfloat> -; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[LOAD1]] -; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[LOAD2]] -; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x bfloat> %[[LOAD1]] -; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x bfloat> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x bfloat> %[[SEL1]], %[[SEL2]] -; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x bfloat> %[[SEL1]], <vscale x 8 x bfloat> %[[SEL2]] -; CHECK-NEXT: call fast bfloat @llvm.vector.reduce.fmax.nxv8bf16(<vscale x 8 x bfloat> %[[SEL]]) -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ] - %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv - %0 = load bfloat, ptr %arrayidx, align 4 - %cmp.i = fcmp fast ogt bfloat %0, %sum.07 - %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret bfloat %.sroa.speculated -} - -; Reduction cannot be vectorized - -; MUL - -; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) -define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @mul -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <8 x i32> -; CHECK: %[[LOAD2:.*]] = load <8 x i32> -; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD1]] -; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD2]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]] -; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]]) -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi i32 [ 2, %entry ], [ %mul, %for.body ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv - %0 = load i32, ptr %arrayidx, align 4 - %mul = mul nsw i32 %0, %sum.07 - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: ; preds = %for.body, %entry - ret i32 %mul -} - -; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies -; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) -define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { -; CHECK-LABEL: @memory_dependence -; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <8 x i32> -; CHECK: %[[LOAD2:.*]] = load <8 x i32> -; CHECK: %[[LOAD3:.*]] = load <8 x i32> -; CHECK: %[[LOAD4:.*]] = load <8 x i32> -; CHECK: %[[ADD1:.*]] = add nsw <8 x i32> %[[LOAD3]], %[[LOAD1]] -; CHECK: %[[ADD2:.*]] = add nsw <8 x i32> %[[LOAD4]], %[[LOAD2]] -; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD3]] -; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD4]] -; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]] -; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]]) -entry: - br label %for.body - -for.body: - %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] - %sum = phi i32 [ %mul, %for.body ], [ 2, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %a, i64 %i - %0 = load i32, ptr %arrayidx, align 4 - %arrayidx1 = getelementptr inbounds i32, ptr %b, i64 %i - %1 = load i32, ptr %arrayidx1, align 4 - %add = add nsw i32 %1, %0 - %add2 = add nuw nsw i64 %i, 32 - %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %add2 - store i32 %add, ptr %arrayidx3, align 4 - %mul = mul nsw i32 %1, %sum - %inc = add nuw nsw i64 %i, 1 - %exitcond.not = icmp eq i64 %inc, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 - -for.end: - ret i32 %mul -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2) -define float @fmuladd(ptr %a, ptr %b, i64 %n) { -; CHECK-LABEL: @fmuladd( -; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float> -; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float> -; CHECK: [[WIDE_LOAD3:%.*]] = load <vscale x 4 x float> -; CHECK: [[WIDE_LOAD4:%.*]] = load <vscale x 4 x float> -; CHECK: [[MULADD1:%.*]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[WIDE_LOAD3]], -; CHECK: [[MULADD2:%.*]] = call reassoc <vscale x 4 x float> @llvm.fmuladd.nxv4f32(<vscale x 4 x float> [[WIDE_LOAD2]], <vscale x 4 x float> [[WIDE_LOAD4]], -; CHECK: middle.block: -; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <vscale x 4 x float> [[MULADD2]], [[MULADD1]] -; CHECK: call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[BIN_RDX]]) -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi float [ 0.000000e+00, %entry ], [ %muladd, %for.body ] - %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv - %0 = load float, ptr %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %iv - %1 = load float, ptr %arrayidx2, align 4 - %muladd = tail call reassoc float @llvm.fmuladd.f32(float %0, float %1, float %sum.07) - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 - -for.end: - ret float %muladd -} - -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2) -define half @fmuladd_f16_zvfh(ptr %a, ptr %b, i64 %n) "target-features"="+zvfh" { -; CHECK-LABEL: @fmuladd_f16_zvfh( -; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load <vscale x 8 x half> -; CHECK: [[WIDE_LOAD2:%.*]] = load <vscale x 8 x half> -; CHECK: [[WIDE_LOAD3:%.*]] = load <vscale x 8 x half> -; CHECK: [[WIDE_LOAD4:%.*]] = load <vscale x 8 x half> -; CHECK: [[MULADD1:%.*]] = call reassoc <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[WIDE_LOAD3]], -; CHECK: [[MULADD2:%.*]] = call reassoc <vscale x 8 x half> @llvm.fmuladd.nxv8f16(<vscale x 8 x half> [[WIDE_LOAD2]], <vscale x 8 x half> [[WIDE_LOAD4]], -; CHECK: middle.block: -; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <vscale x 8 x half> [[MULADD2]], [[MULADD1]] -; CHECK: call reassoc half @llvm.vector.reduce.fadd.nxv8f16(half 0xH8000, <vscale x 8 x half> [[BIN_RDX]]) -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ] - %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv - %0 = load half, ptr %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv - %1 = load half, ptr %arrayidx2, align 4 - %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07) - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 - -for.end: - ret half %muladd -} - - -; We can't scalably vectorize reductions of f16 with zvfhmin or bf16 with zvfbfmin, so make sure we use fixed-length vectors instead. - -; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2) -define half @fmuladd_f16_zvfhmin(ptr %a, ptr %b, i64 %n) "target-features"="+zvfhmin" { -; CHECK-LABEL: @fmuladd_f16_zvfhmin( -; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load <16 x half> -; CHECK: [[WIDE_LOAD2:%.*]] = load <16 x half> -; CHECK: [[WIDE_LOAD3:%.*]] = load <16 x half> -; CHECK: [[WIDE_LOAD4:%.*]] = load <16 x half> -; CHECK: [[MULADD1:%.*]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD]], <16 x half> [[WIDE_LOAD3]], -; CHECK: [[MULADD2:%.*]] = call reassoc <16 x half> @llvm.fmuladd.v16f16(<16 x half> [[WIDE_LOAD2]], <16 x half> [[WIDE_LOAD4]], -; CHECK: middle.block: -; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <16 x half> [[MULADD2]], [[MULADD1]] -; CHECK: call reassoc half @llvm.vector.reduce.fadd.v16f16(half 0xH8000, <16 x half> [[BIN_RDX]]) -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi half [ 0.000000e+00, %entry ], [ %muladd, %for.body ] - %arrayidx = getelementptr inbounds half, ptr %a, i64 %iv - %0 = load half, ptr %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds half, ptr %b, i64 %iv - %1 = load half, ptr %arrayidx2, align 4 - %muladd = tail call reassoc half @llvm.fmuladd.f16(half %0, half %1, half %sum.07) - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 - -for.end: - ret half %muladd -} - -; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2) -define bfloat @fmuladd_bf16(ptr %a, ptr %b, i64 %n) "target-features"="+zvfbfmin" { -; CHECK-LABEL: @fmuladd_bf16( -; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load <16 x bfloat> -; CHECK: [[WIDE_LOAD2:%.*]] = load <16 x bfloat> -; CHECK: [[WIDE_LOAD3:%.*]] = load <16 x bfloat> -; CHECK: [[WIDE_LOAD4:%.*]] = load <16 x bfloat> -; CHECK: [[MULADD1:%.*]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD]], <16 x bfloat> [[WIDE_LOAD3]], -; CHECK: [[MULADD2:%.*]] = call reassoc <16 x bfloat> @llvm.fmuladd.v16bf16(<16 x bfloat> [[WIDE_LOAD2]], <16 x bfloat> [[WIDE_LOAD4]], -; CHECK: middle.block: -; CHECK: [[BIN_RDX:%.*]] = fadd reassoc <16 x bfloat> [[MULADD2]], [[MULADD1]] -; CHECK: call reassoc bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> [[BIN_RDX]]) -; -entry: - br label %for.body - -for.body: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] - %sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %muladd, %for.body ] - %arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv - %0 = load bfloat, ptr %arrayidx, align 4 - %arrayidx2 = getelementptr inbounds bfloat, ptr %b, i64 %iv - %1 = load bfloat, ptr %arrayidx2, align 4 - %muladd = tail call reassoc bfloat @llvm.fmuladd.bf16(bfloat %0, bfloat %1, bfloat %sum.07) - %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, %n - br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !1 - -for.end: - ret bfloat %muladd -} - -declare float @llvm.fmuladd.f32(float, float, float) - -attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } -attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfhmin,+zvfhmin"} -attributes #2 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfbfmin,+zvfbfmin"} - -!0 = distinct !{!0, !1, !2, !3, !4} -!1 = !{!"llvm.loop.vectorize.width", i32 8} -!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} -!3 = !{!"llvm.loop.interleave.count", i32 2} -!4 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 745b8ba..5c6febc 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -1,60 +1,57 @@ -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \ -; RUN: < %s | FileCheck %s -; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \ -; RUN: -scalable-vectorization=on -S < %s | FileCheck %s -check-prefix=SCALABLE +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s -target triple = "riscv64" - -define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { -; CHECK-LABEL: @select_icmp -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sge <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec -; -; SCALABLE-LABEL: @select_icmp -; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] -; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sge <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec +define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) { +; CHECK-LABEL: define i32 @select_icmp( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr readonly captures(none) [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp sge <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP12]], [[X]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; entry: br label %for.body @@ -74,56 +71,57 @@ for.end: ret i32 %cond } -define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { -; CHECK-LABEL: @select_fcmp -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast uge <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec -; -; SCALABLE-LABEL: @select_fcmp -; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer -; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] -; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast uge <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec +define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) { +; CHECK-LABEL: define i32 @select_fcmp( +; CHECK-SAME: float [[X:%.*]], i32 [[Y:%.*]], ptr readonly captures(none) [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast uge <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[COND:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP12]], [[X]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[FOR_END]]: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; entry: br label %for.body @@ -143,52 +141,55 @@ for.end: ret i32 %cond } -define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_i32_from_icmp -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3) -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec -; -; SCALABLE-LABEL: @select_const_i32_from_icmp -; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 -; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] -; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec +define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { +; CHECK-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 7, i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 3, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 7 +; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -208,52 +209,55 @@ exit: ; preds = %for.body ret i32 %5 } -define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { -; CHECK-LABEL: @select_i32_from_icmp -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 3) -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec -; -; SCALABLE-LABEL: @select_i32_from_icmp -; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 -; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] -; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] -; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec +define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) { +; CHECK-LABEL: define i32 @select_i32_from_icmp( +; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 3) +; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 [[B]], i32 [[A]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 3 +; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 [[B]] +; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -273,52 +277,55 @@ exit: ; preds = %for.body ret i32 %5 } -define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_i32_from_fcmp -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast one <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec -; -; SCALABLE-LABEL: @select_const_i32_from_fcmp -; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP10]], 4 -; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] -; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP5]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast one <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP8]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] -; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec +define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { +; CHECK-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast one <vscale x 4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP8]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP7]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP10]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP11]], i32 1, i32 2 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 2, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP12:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[TMP18:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[TMP17:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast ueq float [[TMP15]], 3.000000e+00 +; CHECK-NEXT: [[TMP17]] = select i1 [[TMP16]], i32 [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP18]] = add nuw nsw i64 [[TMP12]], 1 +; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[TMP18]], [[N]] +; CHECK-NEXT: br i1 [[TMP19]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -338,12 +345,24 @@ exit: ; preds = %for.body ret i32 %5 } -define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_f32_from_icmp -; CHECK-NOT: vector.body -; -; SCALABLE-LABEL: @select_const_f32_from_icmp -; SCALABLE-NOT: vector.body +define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) { +; CHECK-LABEL: define float @select_const_f32_from_icmp( +; CHECK-SAME: ptr readonly captures(none) [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[TMP6:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi fast float [ 3.000000e+00, %[[ENTRY]] ], [ [[TMP5:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5]] = select fast i1 [[TMP4]], float [[TMP1]], float 7.000000e+00 +; CHECK-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], [[N]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[TMP5]], %[[FOR_BODY]] ] +; CHECK-NEXT: ret float [[DOTLCSSA]] ; entry: br label %for.body @@ -363,60 +382,67 @@ exit: ; preds = %for.body ret float %5 } -define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { -; CHECK-LABEL: @pred_select_const_i32_from_icmp -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 35) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP5]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) -; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) { +; CHECK-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-SAME: ptr noalias readonly captures(none) [[SRC1:%.*]], ptr noalias readonly captures(none) [[SRC2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 35) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP8]], i32 4, <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i32> poison) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) +; CHECK-NEXT: [[TMP10:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP9]] +; CHECK-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x i1> [[TMP10]], <vscale x 4 x i1> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 -; CHECK-NEXT: %cmp.n = icmp eq i64 %n, %n.vec -; -; SCALABLE-LABEL: @pred_select_const_i32_from_icmp -; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4 -; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] -; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP5]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 35) -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[INDEX]] -; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison) -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 2) -; SCALABLE-NEXT: [[TMP13:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP12]] -; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> [[VEC_PHI]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] -; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[PREDPHI]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 -; SCALABLE-NEXT: %cmp.n = icmp eq i64 %n, %n.vec +; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[PREDPHI]]) +; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_INC:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %[[FOR_INC]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP14]], 35 +; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_INC]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP15]], 2 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] +; CHECK-NEXT: br label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[R_1]] = phi i32 [ [[R_012]], %[[FOR_BODY]] ], [ [[SPEC_SELECT]], %[[IF_THEN]] ] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[FOR_END_LOOPEXIT]]: +; CHECK-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %[[FOR_INC]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[R_1_LCSSA]] ; entry: br label %for.body @@ -446,5 +472,3 @@ for.end.loopexit: ; preds = %for.inc %r.1.lcssa = phi i32 [ %r.1, %for.inc ] ret i32 %r.1.lcssa } - -attributes #0 = { "target-features"="+f,+v" } diff --git a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll index 7c74981..90d261b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/epilog-vectorization-inductions.ll @@ -250,3 +250,78 @@ loop: exit: ret void } + +; Test case for https://github.com/llvm/llvm-project/issues/151686. +define i8 @multiple_inductions_start_at_0() { +; CHECK-LABEL: @multiple_inductions_start_at_0( +; CHECK-NEXT: iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <32 x i8> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <32 x i8> [[STEP_ADD]], zeroinitializer +; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <32 x i8> [[STEP_ADD_2]], zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 128 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i8> [[STEP_ADD_3]], zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <32 x i8> [[STEP_ADD_3]], i32 31 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[BC_RESUME_VAL]] to i8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT3:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i8> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT4:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i32 [[INDEX1]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT4]] = add <4 x i8> [[VEC_IND2]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT3]], 1052 +; CHECK-NEXT: br i1 [[TMP3]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i8> [[VEC_IND2]], i32 3 +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi i32 [ 1052, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 1024, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i32 [ -469762048, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL5]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL6]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ADD]] = add i32 [[IV_2]], -16777216 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[IV_2]] to i8 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp ugt i32 [[IV]], 1050 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[RES:%.*]] = phi i8 [ [[TRUNC]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ], [ [[TMP4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i8 [[RES]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.2 = phi i32 [ 0, %entry ], [ %add, %loop ] + %add = add i32 %iv.2, -16777216 + %trunc = trunc i32 %iv.2 to i8 + %iv.next = add i32 %iv, 1 + %ec = icmp ugt i32 %iv, 1050 + br i1 %ec, label %exit, label %loop + +exit: + %res = phi i8 [ %trunc, %loop ] + ret i8 %res +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index e89f41b..97b5210 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -142,40 +142,40 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) { ; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; AUTO_VEC: for.body.preheader.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640 -; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 4 -; AUTO_VEC-NEXT: [[INVARIANT_GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 -; AUTO_VEC-NEXT: [[INVARIANT_GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12 -; AUTO_VEC-NEXT: [[INVARIANT_GEP5:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 -; AUTO_VEC-NEXT: [[INVARIANT_GEP7:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 20 -; AUTO_VEC-NEXT: [[INVARIANT_GEP9:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 24 -; AUTO_VEC-NEXT: [[INVARIANT_GEP11:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 28 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[X_06:%.*]] = phi float [ 1.000000e+00, [[FOR_BODY_PREHEADER_NEW]] ], [ [[CONV1_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; AUTO_VEC-NEXT: store float [[X_06]], ptr [[ARRAYIDX]], align 4 ; AUTO_VEC-NEXT: [[CONV1:%.*]] = fadd float [[X_06]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 4 ; AUTO_VEC-NEXT: store float [[CONV1]], ptr [[ARRAYIDX_1]], align 4 ; AUTO_VEC-NEXT: [[CONV1_1:%.*]] = fadd float [[CONV1]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP1]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP2]], i64 8 ; AUTO_VEC-NEXT: store float [[CONV1_1]], ptr [[ARRAYIDX_2]], align 4 ; AUTO_VEC-NEXT: [[CONV1_2:%.*]] = fadd float [[CONV1_1]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP3]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP3]], i64 12 ; AUTO_VEC-NEXT: store float [[CONV1_2]], ptr [[ARRAYIDX_3]], align 4 ; AUTO_VEC-NEXT: [[CONV1_3:%.*]] = fadd float [[CONV1_2]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP5]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 16 ; AUTO_VEC-NEXT: store float [[CONV1_3]], ptr [[ARRAYIDX_4]], align 4 ; AUTO_VEC-NEXT: [[CONV1_4:%.*]] = fadd float [[CONV1_3]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP7]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 20 ; AUTO_VEC-NEXT: store float [[CONV1_4]], ptr [[ARRAYIDX_5]], align 4 ; AUTO_VEC-NEXT: [[CONV1_5:%.*]] = fadd float [[CONV1_4]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP9]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP6]], i64 24 ; AUTO_VEC-NEXT: store float [[CONV1_5]], ptr [[ARRAYIDX_6]], align 4 ; AUTO_VEC-NEXT: [[CONV1_6:%.*]] = fadd float [[CONV1_5]], 5.000000e-01 -; AUTO_VEC-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds float, ptr [[INVARIANT_GEP11]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]] +; AUTO_VEC-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 28 ; AUTO_VEC-NEXT: store float [[CONV1_6]], ptr [[ARRAYIDX_7]], align 4 ; AUTO_VEC-NEXT: [[CONV1_7]] = fadd float [[CONV1_6]], 5.000000e-01 ; AUTO_VEC-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 @@ -299,40 +299,40 @@ define double @external_use_without_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] ; AUTO_VEC: entry.new: ; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[SMAX]], 9223372036854775800 -; AUTO_VEC-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 8 -; AUTO_VEC-NEXT: [[INVARIANT_GEP2:%.*]] = getelementptr i8, ptr [[A]], i64 16 -; AUTO_VEC-NEXT: [[INVARIANT_GEP4:%.*]] = getelementptr i8, ptr [[A]], i64 24 -; AUTO_VEC-NEXT: [[INVARIANT_GEP6:%.*]] = getelementptr i8, ptr [[A]], i64 32 -; AUTO_VEC-NEXT: [[INVARIANT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 40 -; AUTO_VEC-NEXT: [[INVARIANT_GEP10:%.*]] = getelementptr i8, ptr [[A]], i64 48 -; AUTO_VEC-NEXT: [[INVARIANT_GEP12:%.*]] = getelementptr i8, ptr [[A]], i64 56 ; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]] ; AUTO_VEC: for.body: ; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[I_NEXT_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[J:%.*]] = phi double [ 0.000000e+00, [[ENTRY_NEW]] ], [ [[J_NEXT_7:%.*]], [[FOR_BODY]] ] ; AUTO_VEC-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY]] ] -; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[I]] ; AUTO_VEC-NEXT: store double [[J]], ptr [[T0]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT:%.*]] = fadd double [[J]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_1:%.*]] = getelementptr double, ptr [[INVARIANT_GEP]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_1:%.*]] = getelementptr i8, ptr [[TMP1]], i64 8 ; AUTO_VEC-NEXT: store double [[J_NEXT]], ptr [[T0_1]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_1:%.*]] = fadd double [[J_NEXT]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_2:%.*]] = getelementptr double, ptr [[INVARIANT_GEP2]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_2:%.*]] = getelementptr i8, ptr [[TMP2]], i64 16 ; AUTO_VEC-NEXT: store double [[J_NEXT_1]], ptr [[T0_2]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_2:%.*]] = fadd double [[J_NEXT_1]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_3:%.*]] = getelementptr double, ptr [[INVARIANT_GEP4]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_3:%.*]] = getelementptr i8, ptr [[TMP3]], i64 24 ; AUTO_VEC-NEXT: store double [[J_NEXT_2]], ptr [[T0_3]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_3:%.*]] = fadd double [[J_NEXT_2]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_4:%.*]] = getelementptr double, ptr [[INVARIANT_GEP6]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_4:%.*]] = getelementptr i8, ptr [[TMP4]], i64 32 ; AUTO_VEC-NEXT: store double [[J_NEXT_3]], ptr [[T0_4]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_4:%.*]] = fadd double [[J_NEXT_3]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_5:%.*]] = getelementptr double, ptr [[INVARIANT_GEP8]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_5:%.*]] = getelementptr i8, ptr [[TMP5]], i64 40 ; AUTO_VEC-NEXT: store double [[J_NEXT_4]], ptr [[T0_5]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_5:%.*]] = fadd double [[J_NEXT_4]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_6:%.*]] = getelementptr double, ptr [[INVARIANT_GEP10]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP6:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_6:%.*]] = getelementptr i8, ptr [[TMP6]], i64 48 ; AUTO_VEC-NEXT: store double [[J_NEXT_5]], ptr [[T0_6]], align 8 ; AUTO_VEC-NEXT: [[J_NEXT_6:%.*]] = fadd double [[J_NEXT_5]], 3.000000e+00 -; AUTO_VEC-NEXT: [[T0_7:%.*]] = getelementptr double, ptr [[INVARIANT_GEP12]], i64 [[I]] +; AUTO_VEC-NEXT: [[TMP7:%.*]] = getelementptr double, ptr [[A]], i64 [[I]] +; AUTO_VEC-NEXT: [[T0_7:%.*]] = getelementptr i8, ptr [[TMP7]], i64 56 ; AUTO_VEC-NEXT: store double [[J_NEXT_6]], ptr [[T0_7]], align 8 ; AUTO_VEC-NEXT: [[I_NEXT_7]] = add nuw nsw i64 [[I]], 8 ; AUTO_VEC-NEXT: [[J_NEXT_7]] = fadd double [[J_NEXT_6]], 3.000000e+00 diff --git a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll index e79995f..f329a18f 100644 --- a/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll +++ b/llvm/test/Transforms/LoopVectorize/single-early-exit-deref-assumptions.ll @@ -7,21 +7,46 @@ define i64 @early_exit_alignment_and_deref_known_via_assumption_with_constant_si ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P1]], i64 4), "dereferenceable"(ptr [[P1]], i64 1024) ] ; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[P2]], i64 4), "dereferenceable"(ptr [[P2]], i64 1024) ] +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: ; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX1]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX1]] +; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] +; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 1024 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.split: +; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[LOOP_END:%.*]] +; CHECK: vector.early.exit: +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX1]], [[TMP8]] +; CHECK-NEXT: br label [[LOOP_END]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP1:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END]] ; CHECK: loop.inc: ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ -1, [[LOOP_INC]] ] +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP1]] ], [ -1, [[LOOP_INC]] ], [ -1, [[MIDDLE_BLOCK]] ], [ [[TMP9]], [[VECTOR_EARLY_EXIT]] ] ; CHECK-NEXT: ret i64 [[RETVAL]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll index 018e2c21..033907e 100644 --- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll +++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll @@ -4,9 +4,9 @@ ; We have 3 loops, two of them are vectorizable (with one being early-exit ; vectorized) and the third one is not. -; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization -; CHECK: 1 loop-vectorize - Number of early exit loops vectorized -; CHECK: 2 loop-vectorize - Number of loops vectorized +; CHECK: 4 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 2 loop-vectorize - Number of early exit loops vectorized +; CHECK: 3 loop-vectorize - Number of loops vectorized target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -15,19 +15,19 @@ entry: %cmp1 = icmp sle i64 %size, 0 %cmp21 = icmp sgt i64 0, %size %or.cond = or i1 %cmp1, %cmp21 - br i1 %or.cond, label %for.end, label %for.body + br i1 %or.cond, label %exit, label %loop -for.body: ; preds = %entry, %for.body - %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv2 +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %arrayidx = getelementptr inbounds float, ptr %a, i64 %iv %0 = load float, ptr %arrayidx, align 4 %mul = fmul float %0, %0 store float %mul, ptr %arrayidx, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 - %cmp2 = icmp sgt i64 %indvars.iv.next, %size - br i1 %cmp2, label %for.end, label %for.body + %iv.next = add nuw nsw i64 %iv, 1 + %cmp2 = icmp sgt i64 %iv.next, %size + br i1 %cmp2, label %exit, label %loop -for.end: ; preds = %entry, %for.body +exit: ; preds = %entry, %loop ret void } @@ -38,9 +38,9 @@ entry: call void @init_mem(ptr %p1, i64 1024) call void @init_mem(ptr %p2, i64 1024) %end.clamped = and i64 %end, 1023 - br label %for.body + br label %loop -for.body: +loop: %ind = phi i64 [ %ind.next, %for.inc ], [ 0, %entry ] %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind %0 = load i32, ptr %arrayidx1, align 4 @@ -52,7 +52,7 @@ for.body: for.inc: %ind.next = add i64 %ind, 1 %cmp = icmp ult i64 %ind.next, %end.clamped - br i1 %cmp, label %for.body, label %exit + br i1 %cmp, label %loop, label %exit found: ret i32 1 @@ -66,25 +66,50 @@ entry: %cmp1 = icmp sle i64 %size, 0 %cmp21 = icmp sgt i64 0, %size %or.cond = or i1 %cmp1, %cmp21 - br i1 %or.cond, label %for.end, label %for.body + br i1 %or.cond, label %exit, label %loop -for.body: ; preds = %entry, %for.body - %indvars.iv2 = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - %0 = add nsw i64 %indvars.iv2, -5 +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %0 = add nsw i64 %iv, -5 %arrayidx = getelementptr inbounds float, ptr %a, i64 %0 %1 = load float, ptr %arrayidx, align 4 - %2 = add nsw i64 %indvars.iv2, 2 + %2 = add nsw i64 %iv, 2 %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %2 %3 = load float, ptr %arrayidx2, align 4 %mul = fmul float %1, %3 - %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %indvars.iv2 + %arrayidx4 = getelementptr inbounds float, ptr %a, i64 %iv store float %mul, ptr %arrayidx4, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 - %cmp2 = icmp sgt i64 %indvars.iv.next, %size - br i1 %cmp2, label %for.end, label %for.body + %iv.next = add nuw nsw i64 %iv, 1 + %cmp2 = icmp sgt i64 %iv.next, %size + br i1 %cmp2, label %exit, label %loop -for.end: ; preds = %entry, %for.body +exit: ret void } +define i1 @multiple_countable_exits_multiple_exit_block(ptr %A, ptr %B, i32 %N) { +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + %cond.0 = icmp eq i32 %iv, %N + br i1 %cond.0, label %exit.0, label %loop.latch + +loop.latch: + %A.gep = getelementptr inbounds i32, ptr %A, i32 %iv + %lv = load i32, ptr %A.gep, align 4 + %B.gep = getelementptr inbounds i32, ptr %B, i32 %iv + store i32 %lv, ptr %B.gep, align 4 + %iv.next = add nuw i32 %iv, 1 + %cond.1 = icmp ult i32 %iv.next, 1000 + br i1 %cond.1, label %loop.header, label %exit.1 + +exit.0: + ret i1 false + +exit.1: + ret i1 true +} + declare void @init_mem(ptr, i64); diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll b/llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll new file mode 100644 index 0000000..b681ecdc --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/iterative_merge.ll @@ -0,0 +1,1103 @@ +;; Test for iterative node merging. This is an llvm-reduced version of the xalancbmk +;; benchmark with FullLTO and memprof. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new -stats \ +; RUN: -memprof-merge-iteration=false %s -S 2>&1 | FileCheck %s --check-prefix=NOITER + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new -stats \ +; RUN: -memprof-merge-iteration=true %s -S 2>&1 | FileCheck %s --check-prefix=ITER + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new -stats \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=ITER + +; NOITER-NOT: _ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb.memprof.2 +; NOITER: 7 memprof-context-disambiguation - Number of function clones created during whole program analysis +; NOITER: 1 memprof-context-disambiguation - Max merge iterations for nodes +; NOITER: 2 memprof-context-disambiguation - Number of new nodes created during merging + +; ITER: _ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb.memprof.2 +; ITER: 8 memprof-context-disambiguation - Number of function clones created during whole program analysis +; ITER: 3 memprof-context-disambiguation - Max merge iterations for nodes +; ITER: 3 memprof-context-disambiguation - Number of new nodes created during merging + +; ModuleID = 'reduced.bc' +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%"class.xercesc_2_5::XMLNumber" = type { %"class.xercesc_2_5::XMLEnumerator" } +%"class.xercesc_2_5::XMLEnumerator" = type { ptr } + +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_XPath.cpp, ptr null }] +@_ZTVN10xalanc_1_822FunctionNormalizeSpaceE = constant { [11 x ptr] } { [11 x ptr] [ptr null, ptr null, ptr null, ptr null, ptr null, ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeEPKN11xercesc_2_57LocatorE, ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeENS_10XObjectPtrEPKN11xercesc_2_57LocatorE, ptr null, ptr null, ptr null, ptr null] } +@_ZTVN10__cxxabiv121__vmi_class_type_infoE = constant { [10 x ptr] } zeroinitializer +@_ZTVN10__cxxabiv119__pointer_type_infoE = constant { [7 x ptr] } zeroinitializer +@_ZTVSt13bad_exception = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt13bad_exception, ptr @_ZNSt13bad_exceptionD1Ev, ptr null, ptr null] } +@_ZTISt13bad_exception = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception } +@_ZTISt9bad_alloc = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception } +@_ZTVSt8bad_cast = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt8bad_cast, ptr @_ZNSt8bad_castD1Ev, ptr null, ptr null] } +@_ZTVSt10bad_typeid = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt10bad_typeid, ptr @_ZNSt10bad_typeidD1Ev, ptr null, ptr null] } +@_ZTVN10__cxxabiv117__class_type_infoE = constant { [10 x ptr] } zeroinitializer +@_ZTISt8bad_cast = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception } +@_ZTVN10__cxxabiv120__si_class_type_infoE = constant { [10 x ptr] } zeroinitializer +@_ZTISt9exception = constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr null } +@_ZTISt10bad_typeid = constant { ptr, ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr null, ptr @_ZTISt9exception } +@_ZTVSt9exception = constant { [5 x ptr] } { [5 x ptr] [ptr null, ptr @_ZTISt9exception, ptr @_ZNSt9exceptionD2Ev, ptr null, ptr null] } + +@_ZN10xalanc_1_814XalanDOMStringC1EPKcj = alias void (ptr, ptr, i32), ptr @_ZN10xalanc_1_814XalanDOMStringC2EPKcj +@_Znwm = alias ptr (i64), ptr @TCMallocInternalNew +@_ZdlPvm = alias void (ptr, i64), ptr @TCMallocInternalDeleteSized +@_Znam = alias ptr (i64), ptr @TCMallocInternalNew +@_ZdaPv = alias void (ptr), ptr @TCMallocInternalDelete +@_ZdlPv = alias void (ptr), ptr @TCMallocInternalDelete +@_ZnwmRKSt9nothrow_t = alias ptr (i64, ptr), ptr @TCMallocInternalNewNothrow +@_ZnamRKSt9nothrow_t = alias ptr (i64, ptr), ptr @TCMallocInternalNewNothrow +@_ZdlPvRKSt9nothrow_t = alias void (ptr, ptr), ptr @TCMallocInternalDelete +@_ZdaPvRKSt9nothrow_t = alias void (ptr, ptr), ptr @TCMallocInternalDelete +@_ZnwmSt11align_val_t = alias ptr (i64, i64), ptr @TCMallocInternalNewAligned +@_ZnwmSt11align_val_tRKSt9nothrow_t = alias ptr (i64, i64, ptr), ptr @TCMallocInternalNewAlignedNothrow +@_ZdlPvSt11align_val_t = alias void (ptr, i64), ptr @TCMallocInternalDelete +@_ZdlPvSt11align_val_tRKSt9nothrow_t = alias void (ptr, i64, ptr), ptr @TCMallocInternalDelete +@_ZdlPvmSt11align_val_t = alias void (ptr, i64, i64), ptr @TCMallocInternalDeleteSizedAligned +@_ZnamSt11align_val_t = alias ptr (i64, i64), ptr @TCMallocInternalNewAligned +@_ZnamSt11align_val_tRKSt9nothrow_t = alias ptr (i64, i64, ptr), ptr @TCMallocInternalNewAlignedNothrow +@_ZdaPvSt11align_val_t = alias void (ptr, i64), ptr @TCMallocInternalDelete +@_ZdaPvSt11align_val_tRKSt9nothrow_t = alias void (ptr, i64, ptr), ptr @TCMallocInternalDelete +@_ZdaPvmSt11align_val_t = alias void (ptr, i64, i64), ptr @TCMallocInternalDeleteSizedAligned +@_ZNSt13exception_ptrD1Ev = alias void (ptr), ptr @_ZNSt13exception_ptrD2Ev +@_ZNSt13exception_ptrC1ERKS_ = alias void (ptr, ptr), ptr @_ZNSt13exception_ptrC2ERKS_ +@_ZNSt13bad_exceptionD1Ev = alias void (ptr), ptr @_ZNSt9exceptionD2Ev +@_ZNSt8bad_castD1Ev = alias void (ptr), ptr @_ZNSt8bad_castD2Ev +@_ZNSt10bad_typeidD1Ev = alias void (ptr), ptr @_ZNSt10bad_typeidD2Ev + +define ptr @_ZNSt3__u6vectorItNS_9allocatorItEEE7reserveEm() { + %1 = tail call ptr @_Znwm(i64 0), !memprof !29, !callsite !592 + ret ptr %1 +} + +; Function Attrs: cold +declare void @_ZN10xalanc_1_88FunctionC2Ev() #0 + +define void @_ZN10xalanc_1_812FunctionLangC2Ev() { + call void @_ZN10xalanc_1_88FunctionC2Ev() + call void @_ZN10xalanc_1_814XalanDOMStringC1EPKcj(ptr null, ptr null, i32 0), !callsite !593 + ret void +} + +define void @_ZN10xalanc_1_822FunctionNormalizeSpaceC2Ev(ptr %0) { + store ptr @_ZTVN10xalanc_1_822FunctionNormalizeSpaceE, ptr %0, align 8 + ret void +} + +define void @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeEPKN11xercesc_2_57LocatorE() { + call void @_ZN10xalanc_1_818XalanMessageLoader10getMessageENS_13XalanMessages5CodesEPKcS4_S4_S4_() + ret void +} + +define ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace7executeERNS_21XPathExecutionContextEPNS_9XalanNodeENS_10XObjectPtrEPKN11xercesc_2_57LocatorE() { + %1 = call ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_10XObjectPtrE() + ret ptr %1 +} + +define ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_10XObjectPtrE() { + %1 = load ptr, ptr null, align 8 + %2 = getelementptr i8, ptr %1, i64 72 + %3 = load ptr, ptr %2, align 8 + %4 = tail call ptr %3(ptr null) + %5 = call ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_14XalanDOMStringE() + ret ptr %5 +} + +define ptr @_ZNK10xalanc_1_822FunctionNormalizeSpace9normalizeERNS_21XPathExecutionContextERKNS_14XalanDOMStringE() { + %1 = call ptr @_ZNSt3__u6vectorItNS_9allocatorItEEE7reserveEm() + ret ptr %1 +} + +declare i64 @mbstowcs() + +define void @_GLOBAL__sub_I_XPath.cpp() { + tail call void @_ZN10xalanc_1_818XPathFunctionTableC2Eb() + ret void +} + +define void @_ZN10xalanc_1_818XPathFunctionTableC2Eb() { + call void @_ZN10xalanc_1_818XPathFunctionTable11CreateTableEv() + ret void +} + +define void @_ZN10xalanc_1_818XPathFunctionTable11CreateTableEv() { + %1 = alloca %"class.xercesc_2_5::XMLNumber", align 8 + call void @_ZN10xalanc_1_812FunctionLangC2Ev() + call void @_ZN10xalanc_1_822FunctionNormalizeSpaceC2Ev(ptr %1) + ret void +} + +define void @_ZN10xalanc_1_814XalanDOMStringC2EPKcj(ptr %0, ptr %1, i32 %2) #1 { + %4 = call ptr @_ZN10xalanc_1_814XalanDOMString6appendEPKcj(ptr %0, ptr %1, i32 %2), !callsite !594 + ret void +} + +; Function Attrs: cold +define ptr @_ZN10xalanc_1_814XalanDOMString6appendEPKcj(ptr %0, ptr %1, i32 %2) #0 { + %4 = load i32, ptr %0, align 8 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %common.ret, label %6 + +common.ret: ; preds = %3 + tail call fastcc void @_ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %1, i32 %2, ptr %0, i1 true), !callsite !595 + ret ptr %0 + +6: ; preds = %3 + call fastcc void @_ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr null, i32 1, ptr null, i1 false) + unreachable +} + +define fastcc void @_ZN10xalanc_1_8L11doTranscodeEPKcjRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, i32 %1, ptr %2, i1 %3) !prof !596 { + %5 = icmp eq i32 %1, 1 + br i1 %5, label %6, label %9 + +6: ; preds = %4 + %7 = call fastcc i1 @_ZN10xalanc_1_8L28doTranscodeFromLocalCodePageEPKcjbRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, ptr %2, i1 %3) + br i1 %7, label %11, label %8 + +8: ; preds = %6 + ret void + +9: ; preds = %4 + %10 = call fastcc i1 @_ZN10xalanc_1_8L28doTranscodeFromLocalCodePageEPKcjbRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, ptr null, i1 false), !callsite !597 + br label %11 + +11: ; preds = %9, %6 + ret void +} + +define fastcc i1 @_ZN10xalanc_1_8L28doTranscodeFromLocalCodePageEPKcjbRNSt3__u6vectorItNS2_9allocatorItEEEEb(ptr %0, ptr %1, i1 %2) { + %4 = icmp eq ptr %0, null + br i1 %4, label %5, label %7 + +5: ; preds = %3 + %6 = load i64, ptr %1, align 8 + %cond = icmp eq i64 %6, 0 + ret i1 %cond + +7: ; preds = %3 + %8 = call i64 @mbstowcs() + %9 = zext i1 %2 to i64 + call void @_ZNSt3__u6vectorIwNS_9allocatorIwEEE8__appendEm(), !callsite !598 + ret i1 false +} + +define void @_ZNSt3__u6vectorIwNS_9allocatorIwEEE8__appendEm() { + %1 = tail call ptr @_Znwm(i64 0), !memprof !599, !callsite !768 + ret void +} + +; Function Attrs: cold +define void @_ZN10xalanc_1_826XalanInMemoryMessageLoaderC2Ev() #0 { + call void @_ZN10xalanc_1_814XalanDOMStringC1EPKcj(ptr null, ptr null, i32 0), !callsite !769 + ret void +} + +define void @_ZN10xalanc_1_818XalanMessageLoader12createLoaderEv() { + %1 = tail call ptr @_Znwm(i64 0) + call void @_ZN10xalanc_1_826XalanInMemoryMessageLoaderC2Ev(), !callsite !770 + ret void +} + +define void @_ZN10xalanc_1_818XalanMessageLoader10getMessageENS_13XalanMessages5CodesEPKcS4_S4_S4_() { + tail call void @_ZN10xalanc_1_818XalanMessageLoader12createLoaderEv() + ret void +} + +define void @TCMallocInternalDeleteSized() { + ret void +} + +; Function Attrs: nobuiltin noinline +define ptr @TCMallocInternalNew(i64 %0) #2 { + ret ptr null +} + +define void @TCMallocInternalDelete() { + ret void +} + +define i64 @TCMallocInternalNewNothrow() { + ret i64 0 +} + +define i64 @TCMallocInternalNewAligned() { + ret i64 0 +} + +define i64 @TCMallocInternalNewAlignedNothrow() { + ret i64 0 +} + +define void @TCMallocInternalDeleteSizedAligned() { + ret void +} + +define i1 @_ZSt18uncaught_exceptionv() { + ret i1 false +} + +define void @_ZNSt13exception_ptrD2Ev() { + ret void +} + +define void @_ZNSt13exception_ptrC2ERKS_() { + ret void +} + +define ptr @_ZNSt13exception_ptraSERKS_() { + ret ptr null +} + +define void @_ZSt17rethrow_exceptionSt13exception_ptr() { + unreachable +} + +define void @_ZSt17__throw_bad_allocv() { + unreachable +} + +define void @__cxa_bad_cast() { + unreachable +} + +define ptr @__cxa_allocate_exception() { + ret ptr null +} + +define ptr @__cxa_begin_catch() { + ret ptr null +} + +define void @__cxa_free_exception() { + ret void +} + +define void @__cxa_throw() { + unreachable +} + +define void @__cxa_end_catch() { + ret void +} + +define ptr @__cxa_current_exception_type() { + ret ptr null +} + +define void @__cxa_rethrow() { + ret void +} + +define void @_ZSt9terminatev() { + ret void +} + +define i32 @__gxx_personality_v0() { + ret i32 0 +} + +define void @__cxa_call_unexpected() { + ret void +} + +define ptr @__dynamic_cast() { + ret ptr null +} + +define void @_ZNSt9exceptionD2Ev() { + ret void +} + +define void @_ZNSt8bad_castD2Ev() { + ret void +} + +define void @_ZNSt10bad_typeidD2Ev() { + ret void +} + +attributes #0 = { cold } +attributes #1 = { "target-features"="+aes" } +attributes #2 = { nobuiltin noinline } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 331263925478} +!4 = !{!"MaxCount", i64 89521949747} +!5 = !{!"MaxInternalCount", i64 89521949747} +!6 = !{!"MaxFunctionCount", i64 14842374247} +!7 = !{!"NumCounts", i64 80529} +!8 = !{!"NumFunctions", i64 13237} +!9 = !{!"IsPartialProfile", i64 0} +!10 = !{!"PartialProfileRatio", double 0.000000e+00} +!11 = !{!"DetailedSummary", !12} +!12 = !{!13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28} +!13 = !{i32 10000, i64 89521949747, i32 1} +!14 = !{i32 100000, i64 89521949747, i32 1} +!15 = !{i32 200000, i64 89521949747, i32 1} +!16 = !{i32 300000, i64 89454229684, i32 2} +!17 = !{i32 400000, i64 89454229684, i32 2} +!18 = !{i32 500000, i64 89454229684, i32 2} +!19 = !{i32 600000, i64 28686354153, i32 3} +!20 = !{i32 700000, i64 12169900676, i32 5} +!21 = !{i32 800000, i64 2585869019, i32 9} +!22 = !{i32 900000, i64 1189366531, i32 32} +!23 = !{i32 950000, i64 137116556, i32 82} +!24 = !{i32 990000, i64 24641624, i32 286} +!25 = !{i32 999000, i64 832911, i32 881} +!26 = !{i32 999900, i64 110792, i32 1739} +!27 = !{i32 999990, i64 20910, i32 2245} +!28 = !{i32 999999, i64 650, i32 2817} +!29 = !{!30, !32, !34, !36, !38, !40, !42, !44, !46, !48, !50, !52, !54, !56, !58, !60, !62, !64, !66, !68, !70, !72, !74, !76, !78, !80, !82, !84, !86, !88, !90, !92, !94, !96, !98, !100, !102, !104, !106, !108, !110, !112, !114, !116, !118, !120, !122, !124, !126, !128, !130, !132, !134, !136, !138, !140, !142, !144, !146, !148, !150, !152, !154, !156, !158, !160, !162, !164, !166, !168, !170, !172, !174, !176, !178, !180, !182, !184, !186, !188, !190, !192, !194, !196, !198, !200, !202, !204, !206, !208, !210, !212, !214, !216, !218, !220, !222, !224, !226, !228, !230, !232, !234, !236, !238, !240, !242, !244, !246, !248, !250, !252, !254, !256, !258, !260, !262, !264, !266, !268, !270, !272, !274, !276, !278, !280, !282, !284, !286, !288, !290, !292, !294, !296, !298, !300, !302, !304, !306, !308, !310, !312, !314, !316, !318, !320, !322, !324, !326, !328, !330, !332, !334, !336, !338, !340, !342, !344, !346, !348, !350, !352, !354, !356, !358, !360, !362, !364, !366, !368, !370, !372, !374, !376, !378, !380, !382, !384, !386, !388, !390, !392, !394, !396, !398, !400, !402, !404, !406, !408, !410, !412, !414, !416, !418, !420, !422, !424, !426, !428, !430, !432, !434, !436, !438, !440, !442, !444, !446, !448, !450, !452, !454, !456, !458, !460, !462, !464, !466, !468, !470, !472, !474, !476, !478, !480, !482, !484, !486, !488, !490, !492, !494, !496, !498, !500, !502, !504, !506, !508, !510, !512, !514, !516, !518, !520, !522, !524, !526, !528, !530, !532, !534, !536, !538, !540, !542, !544, !546, !548, !550, !552, !554, !556, !558, !560, !562, !564, !566, !568, !570, !572, !574, !576, !578, !580, !582, !584, !586, !588, !590} +!30 = !{!31, !"cold"} +!31 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 6029234927090217085} +!32 = !{!33, !"notcold"} +!33 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -8053384441926065153, i64 -2506951929422432416, i64 -8699079470974299286, i64 -4247704025117397876, i64 7919823637914964443, i64 2053928606242451379, i64 -5479684209450392625, i64 4312698517630782220, i64 5379466077518675850} +!34 = !{!35, !"cold"} +!35 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -8053384441926065153, i64 -2506951929422432416, i64 -8699079470974299286, i64 -4247704025117397876, i64 7919823637914964443, i64 2053928606242451379, i64 -5479684209450392625, i64 4312698517630782220, i64 -1805555115991223293} +!36 = !{!37, !"cold"} +!37 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -8053384441926065153, i64 -2506951929422432416, i64 -8699079470974299286, i64 -2032085148702428395} +!38 = !{!39, !"cold"} +!39 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640} +!40 = !{!41, !"cold"} +!41 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640} +!42 = !{!43, !"notcold"} +!43 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640} +!44 = !{!45, !"cold"} +!45 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 2493577091211980627, i64 2339589718150484619, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 8647084438977525618} +!46 = !{!47, !"cold"} +!47 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 6798683962384280640} +!48 = !{!49, !"cold"} +!49 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 8647084438977525618, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400} +!50 = !{!51, !"cold"} +!51 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 8647084438977525618, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640} +!52 = !{!53, !"cold"} +!53 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 8647084438977525618, i64 -1079892354093417124, i64 6798683962384280640} +!54 = !{!55, !"cold"} +!55 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400} +!56 = !{!57, !"notcold"} +!57 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640} +!58 = !{!59, !"cold"} +!59 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 3310541634398918400, i64 3849307506121682135, i64 -1079892354093417124, i64 6798683962384280640} +!60 = !{!61, !"cold"} +!61 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 -6416746930642965881, i64 -1079892354093417124, i64 -1079892354093417124, i64 6798683962384280640} +!62 = !{!63, !"notcold"} +!63 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 -6416746930642965881, i64 -1079892354093417124, i64 -1079892354093417124, i64 -1079892354093417124} +!64 = !{!65, !"cold"} +!65 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4456634289032362696, i64 -395897789902236628, i64 5224670244285114447, i64 8364530757130553949, i64 -1126364985256767411, i64 2339589718150484619, i64 -1079892354093417124, i64 -1079892354093417124} +!66 = !{!67, !"cold"} +!67 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!68 = !{!69, !"cold"} +!69 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!70 = !{!71, !"cold"} +!71 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!72 = !{!73, !"cold"} +!73 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!74 = !{!75, !"cold"} +!75 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!76 = !{!77, !"cold"} +!77 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!78 = !{!79, !"cold"} +!79 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!80 = !{!81, !"cold"} +!81 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!82 = !{!83, !"cold"} +!83 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!84 = !{!85, !"cold"} +!85 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!86 = !{!87, !"cold"} +!87 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!88 = !{!89, !"cold"} +!89 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!90 = !{!91, !"cold"} +!91 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!92 = !{!93, !"cold"} +!93 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!94 = !{!95, !"cold"} +!95 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!96 = !{!97, !"cold"} +!97 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!98 = !{!99, !"cold"} +!99 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!100 = !{!101, !"cold"} +!101 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!102 = !{!103, !"cold"} +!103 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!104 = !{!105, !"cold"} +!105 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!106 = !{!107, !"cold"} +!107 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!108 = !{!109, !"cold"} +!109 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!110 = !{!111, !"cold"} +!111 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!112 = !{!113, !"cold"} +!113 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!114 = !{!115, !"cold"} +!115 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!116 = !{!117, !"cold"} +!117 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!118 = !{!119, !"cold"} +!119 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!120 = !{!121, !"cold"} +!121 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!122 = !{!123, !"cold"} +!123 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 288445990964680341, i64 -8648384467214739297, i64 -2384299894849227688, i64 3639317425425392532, i64 5072014983283348683, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350} +!124 = !{!125, !"cold"} +!125 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 703544601638968040} +!126 = !{!127, !"cold"} +!127 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 3608551295493976071} +!128 = !{!129, !"cold"} +!129 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 4358851737059049324} +!130 = !{!131, !"cold"} +!131 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 5183932555433262029} +!132 = !{!133, !"notcold"} +!133 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7543168268883704429, i64 3717163490267262493} +!134 = !{!135, !"cold"} +!135 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7543168268883704429, i64 6956871998514654501} +!136 = !{!137, !"cold"} +!137 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7622038321347520920} +!138 = !{!139, !"cold"} +!139 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 7923736392933351814} +!140 = !{!141, !"cold"} +!141 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 8786684249512079407} +!142 = !{!143, !"cold"} +!143 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 5227238232120559200} +!144 = !{!145, !"notcold"} +!145 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 6440225172503051565} +!146 = !{!147, !"cold"} +!147 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -8676112728238134370} +!148 = !{!149, !"cold"} +!149 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -4578348703977238664} +!150 = !{!151, !"cold"} +!151 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -3818321239655607597} +!152 = !{!153, !"cold"} +!153 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -8323794275797098367, i64 9067217415982793418, i64 -8980909613737481067, i64 -982712266534864075, i64 -5252032392769887959, i64 -4190487729767247227, i64 1665128429432451888, i64 7065428941432465507, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -380569784870776951} +!154 = !{!155, !"cold"} +!155 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 5227238232120559200} +!156 = !{!157, !"cold"} +!157 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 6440225172503051565} +!158 = !{!159, !"notcold"} +!159 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -8676112728238134370} +!160 = !{!161, !"cold"} +!161 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 1591808021703355378, i64 2178145963970842528, i64 1947741732663711851, i64 -3818321239655607597} +!162 = !{!163, !"cold"} +!163 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 5227238232120559200} +!164 = !{!165, !"cold"} +!165 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 6440225172503051565} +!166 = !{!167, !"notcold"} +!167 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 -8676112728238134370} +!168 = !{!169, !"cold"} +!169 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6850731039037719840, i64 -9108486118803861691, i64 7441112283992263798, i64 5436097384034253767, i64 7990371184183621520, i64 3919193733365087207, i64 2213936510646135043, i64 -6569365957789783611, i64 5685037928165748194, i64 2178145963970842528, i64 1947741732663711851, i64 -4578348703977238664} +!170 = !{!171, !"cold"} +!171 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6599978295092623218} +!172 = !{!173, !"cold"} +!173 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6580674910425543918} +!174 = !{!175, !"cold"} +!175 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -6003516630083934328} +!176 = !{!177, !"cold"} +!177 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -725604301669741756} +!178 = !{!179, !"cold"} +!179 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -528103690324761532, i64 -7038881423600967439, i64 1457427447070478076, i64 -6391461151347487032, i64 -7188595821817891258, i64 -5089627249000618877, i64 1595244419265827813, i64 -5230206178376217032, i64 9110891150749782890, i64 1047178367295653512, i64 7589111283956938786} +!180 = !{!181, !"cold"} +!181 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -528103690324761532, i64 -7038881423600967439, i64 1457427447070478076, i64 -6391461151347487032, i64 -7188595821817891258, i64 -5089627249000618877, i64 1595244419265827813, i64 -5230206178376217032, i64 9110891150749782890, i64 1664659532151860180, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 -8044437084415731706} +!182 = !{!183, !"notcold"} +!183 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 5991232955697150686, i64 -528103690324761532, i64 -7038881423600967439, i64 1457427447070478076, i64 -6391461151347487032, i64 -7188595821817891258, i64 -5089627249000618877, i64 1595244419265827813, i64 -5230206178376217032, i64 9110891150749782890, i64 1664659532151860180, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 -4180453512466417651} +!184 = !{!185, !"notcold"} +!185 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -6616032301308523050, i64 1047178367295653512} +!186 = !{!187, !"cold"} +!187 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -6616032301308523050, i64 1664659532151860180} +!188 = !{!189, !"cold"} +!189 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -4220077066789644587, i64 7630274881348409761, i64 2314936267722354705} +!190 = !{!191, !"cold"} +!191 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -4220077066789644587, i64 7630274881348409761, i64 4570902236181266557} +!192 = !{!193, !"notcold"} +!193 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -4220077066789644587, i64 7630274881348409761, i64 -5242964645367079043} +!194 = !{!195, !"cold"} +!195 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 3761334221289231340, i64 7635330182073413592, i64 -364806006548699073} +!196 = !{!197, !"cold"} +!197 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 4237282366048505309} +!198 = !{!199, !"cold"} +!199 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!200 = !{!201, !"cold"} +!201 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!202 = !{!203, !"cold"} +!203 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!204 = !{!205, !"cold"} +!205 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!206 = !{!207, !"cold"} +!207 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!208 = !{!209, !"cold"} +!209 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!210 = !{!211, !"cold"} +!211 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!212 = !{!213, !"cold"} +!213 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!214 = !{!215, !"cold"} +!215 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!216 = !{!217, !"cold"} +!217 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!218 = !{!219, !"cold"} +!219 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!220 = !{!221, !"cold"} +!221 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!222 = !{!223, !"cold"} +!223 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!224 = !{!225, !"cold"} +!225 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!226 = !{!227, !"cold"} +!227 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!228 = !{!229, !"notcold"} +!229 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 2456243970925072761, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350} +!230 = !{!231, !"cold"} +!231 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!232 = !{!233, !"notcold"} +!233 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148} +!234 = !{!235, !"cold"} +!235 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696} +!236 = !{!237, !"cold"} +!237 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!238 = !{!239, !"cold"} +!239 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!240 = !{!241, !"cold"} +!241 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!242 = !{!243, !"cold"} +!243 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!244 = !{!245, !"cold"} +!245 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!246 = !{!247, !"cold"} +!247 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!248 = !{!249, !"cold"} +!249 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!250 = !{!251, !"cold"} +!251 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!252 = !{!253, !"cold"} +!253 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!254 = !{!255, !"cold"} +!255 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!256 = !{!257, !"cold"} +!257 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!258 = !{!259, !"cold"} +!259 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!260 = !{!261, !"cold"} +!261 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!262 = !{!263, !"cold"} +!263 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!264 = !{!265, !"cold"} +!265 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!266 = !{!267, !"cold"} +!267 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!268 = !{!269, !"cold"} +!269 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!270 = !{!271, !"cold"} +!271 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!272 = !{!273, !"cold"} +!273 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!274 = !{!275, !"cold"} +!275 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!276 = !{!277, !"cold"} +!277 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!278 = !{!279, !"notcold"} +!279 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -7408386176617163641, i64 -4620509665591411514, i64 -4971727196067614, i64 -9084354509075557430, i64 -5657724779880519508, i64 -5790323689271594480, i64 -4719891145283076059, i64 3187910181892717895, i64 8259857497759158849, i64 2561452775733345351, i64 -8882547524614770599, i64 -7413617718359238186, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350} +!280 = !{!281, !"cold"} +!281 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 5418863195168147540, i64 -2230507569891602201} +!282 = !{!283, !"cold"} +!283 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 2654783664530695739, i64 -8629883790003563202, i64 -923725743350185774, i64 2929907956083581648, i64 -8991518728647676517} +!284 = !{!285, !"cold"} +!285 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 2654783664530695739, i64 -8629883790003563202, i64 -923725743350185774, i64 2929907956083581648, i64 -7241526169985634471} +!286 = !{!287, !"notcold"} +!287 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 2654783664530695739, i64 -8629883790003563202, i64 -923725743350185774, i64 2929907956083581648, i64 -2070608964811380293} +!288 = !{!289, !"notcold"} +!289 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 3898931366823636439} +!290 = !{!291, !"cold"} +!291 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 6718910740018369967} +!292 = !{!293, !"cold"} +!293 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 8934449472348526968} +!294 = !{!295, !"cold"} +!295 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 5795517037440084991, i64 -1796035955371760228} +!296 = !{!297, !"notcold"} +!297 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7768164617256106301, i64 5227238232120559200} +!298 = !{!299, !"cold"} +!299 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7768164617256106301, i64 -8676112728238134370} +!300 = !{!301, !"cold"} +!301 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7768164617256106301, i64 -5097561514079731295} +!302 = !{!303, !"notcold"} +!303 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7845428890017607803, i64 3791151357907954961, i64 -4282486660539596674, i64 2873405805561972602, i64 3863349475384737601, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550} +!304 = !{!305, !"cold"} +!305 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 7845428890017607803, i64 3791151357907954961, i64 -4282486660539596674, i64 2873405805561972602, i64 3863349475384737601, i64 2509678254863108051, i64 927100692576121148, i64 -642697777073794173} +!306 = !{!307, !"cold"} +!307 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 -6175606193227481616} +!308 = !{!309, !"cold"} +!309 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 1856492280661618760, i64 -2159124216309770955} +!310 = !{!311, !"notcold"} +!311 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 6452974616915078953, i64 3430064890274741042} +!312 = !{!313, !"cold"} +!313 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 6452974616915078953, i64 -7269278260587173790} +!314 = !{!315, !"cold"} +!315 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 6452974616915078953, i64 -4180453512466417651} +!316 = !{!317, !"cold"} +!317 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -6517003774656365154, i64 -3601339536116888955, i64 -3425723542151985551, i64 -1861972594052659404} +!318 = !{!319, !"cold"} +!319 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 5390968037314166465} +!320 = !{!321, !"cold"} +!321 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 7144453710359912936} +!322 = !{!323, !"cold"} +!323 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -7099026173386863432} +!324 = !{!325, !"cold"} +!325 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -5247604975316055689} +!326 = !{!327, !"cold"} +!327 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -4068062742094437340} +!328 = !{!329, !"cold"} +!329 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3834757517663654734, i64 5227238232120559200} +!330 = !{!331, !"cold"} +!331 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3834757517663654734, i64 -4578348703977238664} +!332 = !{!333, !"cold"} +!333 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3834757517663654734, i64 -3818321239655607597} +!334 = !{!335, !"cold"} +!335 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 2786152373261539699} +!336 = !{!337, !"cold"} +!337 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 4901895946974703269} +!338 = !{!339, !"cold"} +!339 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -5529974889548515387} +!340 = !{!341, !"cold"} +!341 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 1236134833646061117} +!342 = !{!343, !"cold"} +!343 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042} +!344 = !{!345, !"cold"} +!345 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 -8044437084415731706} +!346 = !{!347, !"cold"} +!347 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 7589111283956938786, i64 -4015860425916992153, i64 -6574691243059686550, i64 -865315721850786456, i64 -4343166111045357943, i64 -9170346855404441874, i64 766214442491134059} +!348 = !{!349, !"cold"} +!349 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 7906170180224855390, i64 7589111283956938786, i64 -4015860425916992153, i64 -6574691243059686550, i64 -865315721850786456, i64 -4343166111045357943, i64 -9170346855404441874, i64 -8321193460161544571} +!350 = !{!351, !"cold"} +!351 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 1249779207984631657, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042} +!352 = !{!353, !"cold"} +!353 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 1249779207984631657, i64 7589111283956938786} +!354 = !{!355, !"cold"} +!355 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 2763817770009354222, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042} +!356 = !{!357, !"cold"} +!357 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 3213891655075030301} +!358 = !{!359, !"cold"} +!359 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 -9039914273107794752, i64 5180037934435350253, i64 7590028107620240394, i64 4269093341531506576, i64 3430064890274741042} +!360 = !{!361, !"cold"} +!361 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 -5356510825827839560, i64 1664659532151860180, i64 5180037934435350253} +!362 = !{!363, !"cold"} +!363 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -8186310573132095759, i64 -3801963940567328533} +!364 = !{!365, !"cold"} +!365 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 4264643968589219398, i64 -1041761292742627790, i64 -1190723991431075531} +!366 = !{!367, !"cold"} +!367 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 5375458305822857010, i64 296143722720000868} +!368 = !{!369, !"cold"} +!369 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 7093742121589842512, i64 -5755277970707044392} +!370 = !{!371, !"cold"} +!371 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 8652501918564778285, i64 -5755277970707044392} +!372 = !{!373, !"cold"} +!373 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!374 = !{!375, !"cold"} +!375 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!376 = !{!377, !"cold"} +!377 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!378 = !{!379, !"cold"} +!379 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!380 = !{!381, !"cold"} +!381 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!382 = !{!383, !"cold"} +!383 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!384 = !{!385, !"cold"} +!385 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!386 = !{!387, !"cold"} +!387 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!388 = !{!389, !"cold"} +!389 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!390 = !{!391, !"cold"} +!391 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!392 = !{!393, !"cold"} +!393 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!394 = !{!395, !"cold"} +!395 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!396 = !{!397, !"cold"} +!397 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!398 = !{!399, !"cold"} +!399 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!400 = !{!401, !"cold"} +!401 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!402 = !{!403, !"cold"} +!403 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!404 = !{!405, !"cold"} +!405 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!406 = !{!407, !"cold"} +!407 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148} +!408 = !{!409, !"cold"} +!409 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!410 = !{!411, !"cold"} +!411 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!412 = !{!413, !"cold"} +!413 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!414 = !{!415, !"cold"} +!415 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214, i64 -2809205627667035257, i64 -3454968005337915156, i64 5332944666593238327, i64 2118523074633187269, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 2535919208235830696, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148, i64 -3434822233783588550, i64 4936683088593976443, i64 -3971271186007926312, i64 -2850591094085675928, i64 -5159413647374657479, i64 -4910767396033570911, i64 -2336071731736274914, i64 -3286861646405277101, i64 4242887956499846691, i64 2509678254863108051, i64 927100692576121148} +!416 = !{!417, !"cold"} +!417 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!418 = !{!419, !"cold"} +!419 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!420 = !{!421, !"cold"} +!421 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!422 = !{!423, !"cold"} +!423 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!424 = !{!425, !"cold"} +!425 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!426 = !{!427, !"cold"} +!427 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!428 = !{!429, !"cold"} +!429 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -8823323827256289214} +!430 = !{!431, !"cold"} +!431 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 4034922888097517058, i64 2189128933532668425, i64 1118136747583788720, i64 -6884417272332536290, i64 -382528736566041484, i64 4346814329690051411, i64 -4529885655313035366, i64 3326028940429496022, i64 -1802335027011573071, i64 -7599184525564588054, i64 2509678254863108051, i64 4031363116508023010, i64 -1510405904543134799, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350, i64 3189374357398009742, i64 2118523074633187269, i64 -4004529337280035823, i64 -2648139195504863883, i64 2509678254863108051, i64 -6736971639865489350} +!432 = !{!433, !"cold"} +!433 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 9139250784910037233, i64 -3507141864672488895, i64 6409510268436951296, i64 3865292116923879613, i64 6531269350744799067} +!434 = !{!435, !"cold"} +!435 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -3830758595982508384, i64 2017594165120768407, i64 -8512453591954350037} +!436 = !{!437, !"cold"} +!437 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 4501820981166842392, i64 -3569839323322692552, i64 -529266326168624863} +!438 = !{!439, !"cold"} +!439 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 75847647400915869} +!440 = !{!441, !"notcold"} +!441 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 183044117821689400} +!442 = !{!443, !"cold"} +!443 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 448612777694345181} +!444 = !{!445, !"cold"} +!445 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 885980778253288927} +!446 = !{!447, !"cold"} +!447 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1180177230914639611} +!448 = !{!449, !"cold"} +!449 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1494295413782701148} +!450 = !{!451, !"cold"} +!451 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1659406033211294585} +!452 = !{!453, !"cold"} +!453 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1746300563922301711} +!454 = !{!455, !"cold"} +!455 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1838331058072364305} +!456 = !{!457, !"cold"} +!457 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1955855015757903991} +!458 = !{!459, !"cold"} +!459 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2061170524869170355} +!460 = !{!461, !"cold"} +!461 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2063820668743361132} +!462 = !{!463, !"cold"} +!463 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2165533455721301492} +!464 = !{!465, !"cold"} +!465 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2244685673087294926} +!466 = !{!467, !"cold"} +!467 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3034209063447837014} +!468 = !{!469, !"cold"} +!469 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3036500738208828614} +!470 = !{!471, !"cold"} +!471 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3377720129330375308} +!472 = !{!473, !"cold"} +!473 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3706881338225322912} +!474 = !{!475, !"cold"} +!475 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3782560208401211919} +!476 = !{!477, !"cold"} +!477 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3863666838850843735} +!478 = !{!479, !"cold"} +!479 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4283301159808000566} +!480 = !{!481, !"cold"} +!481 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4384706011107298227} +!482 = !{!483, !"cold"} +!483 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4607155140475180580} +!484 = !{!485, !"cold"} +!485 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4714747246633738682} +!486 = !{!487, !"cold"} +!487 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4723652453916328978} +!488 = !{!489, !"cold"} +!489 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4727386874466965794} +!490 = !{!491, !"cold"} +!491 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5253938656018120605} +!492 = !{!493, !"cold"} +!493 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5514184169544971646} +!494 = !{!495, !"cold"} +!495 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5608420749997639494} +!496 = !{!497, !"cold"} +!497 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5673887548322881439} +!498 = !{!499, !"cold"} +!499 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6431479059631025437} +!500 = !{!501, !"cold"} +!501 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6478461706098589874} +!502 = !{!503, !"cold"} +!503 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6516605682960517029} +!504 = !{!505, !"cold"} +!505 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6573556954419869084} +!506 = !{!507, !"cold"} +!507 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6616057203216317315} +!508 = !{!509, !"cold"} +!509 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7654798960227041366} +!510 = !{!511, !"cold"} +!511 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8203893749632590951} +!512 = !{!513, !"cold"} +!513 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8205991499743838669} +!514 = !{!515, !"cold"} +!515 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8731033915056759998} +!516 = !{!517, !"cold"} +!517 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9004589987493894324} +!518 = !{!519, !"cold"} +!519 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9163289110187211963} +!520 = !{!521, !"cold"} +!521 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9194683874556492116} +!522 = !{!523, !"cold"} +!523 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8575720568240258506} +!524 = !{!525, !"cold"} +!525 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8058981938237464077} +!526 = !{!527, !"cold"} +!527 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7589260736264366062} +!528 = !{!529, !"cold"} +!529 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6900734672072851702} +!530 = !{!531, !"cold"} +!531 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6409112906345845960} +!532 = !{!533, !"cold"} +!533 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5965768366903796804} +!534 = !{!535, !"cold"} +!535 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5597465602586680838} +!536 = !{!537, !"cold"} +!537 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5417805719047465045} +!538 = !{!539, !"cold"} +!539 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5086634999161749506} +!540 = !{!541, !"cold"} +!541 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4597440142749257264} +!542 = !{!543, !"cold"} +!543 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4451114404581865651} +!544 = !{!545, !"cold"} +!545 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3770505485117883193} +!546 = !{!547, !"cold"} +!547 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3615756978728100936} +!548 = !{!549, !"cold"} +!549 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3487944172906860150} +!550 = !{!551, !"cold"} +!551 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3343380507450452769} +!552 = !{!553, !"cold"} +!553 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2513421463471446585} +!554 = !{!555, !"cold"} +!555 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2457105650992547275} +!556 = !{!557, !"cold"} +!557 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2303844962102880850} +!558 = !{!559, !"cold"} +!559 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2234974922194715802} +!560 = !{!561, !"cold"} +!561 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2202950085962554431} +!562 = !{!563, !"cold"} +!563 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2188352265869670772} +!564 = !{!565, !"cold"} +!565 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1875947641213642418} +!566 = !{!567, !"cold"} +!567 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1820129202923097948} +!568 = !{!569, !"cold"} +!569 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1373019485205162581} +!570 = !{!571, !"cold"} +!571 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1152657617489033068} +!572 = !{!573, !"cold"} +!573 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1141827363237952836} +!574 = !{!575, !"cold"} +!575 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1018440817185542489} +!576 = !{!577, !"cold"} +!577 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -864073397647784829} +!578 = !{!579, !"cold"} +!579 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -621063222059245646} +!580 = !{!581, !"cold"} +!581 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -443019638236000223} +!582 = !{!583, !"cold"} +!583 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -365722648089426825} +!584 = !{!585, !"cold"} +!585 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -87465032429483344} +!586 = !{!587, !"cold"} +!587 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -85418178938284967} +!588 = !{!589, !"cold"} +!589 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 6752861855664590120, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -1631791060190603686} +!590 = !{!591, !"cold"} +!591 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257, i64 7655619093809680908} +!592 = !{i64 761518489666860826, i64 -1420336805534834351, i64 -2943078617660248973, i64 3500755695426091485, i64 4378935957859808257} +!593 = !{i64 -2395126913894468764, i64 -5597465602586680838} +!594 = !{i64 8922170082039419050} +!595 = !{i64 -627092666823557924} +!596 = !{!"function_entry_count", i64 152} +!597 = !{i64 -3914526793362151039, i64 -281623063928291605} +!598 = !{i64 8550096939708590324, i64 3396817364192390559} +!599 = !{!600, !602, !604, !606, !608, !610, !612, !614, !616, !618, !620, !622, !624, !626, !628, !630, !632, !634, !636, !638, !640, !642, !644, !646, !648, !650, !652, !654, !656, !658, !660, !662, !664, !666, !668, !670, !672, !674, !676, !678, !680, !682, !684, !686, !688, !690, !692, !694, !696, !698, !700, !702, !704, !706, !708, !710, !712, !714, !716, !718, !720, !722, !724, !726, !728, !730, !732, !734, !736, !738, !740, !742, !744, !746, !748, !750, !752, !754, !756, !758, !760, !762, !764, !766} +!600 = !{!601, !"cold"} +!601 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 75847647400915869} +!602 = !{!603, !"notcold"} +!603 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 183044117821689400} +!604 = !{!605, !"cold"} +!605 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 387225403803835478} +!606 = !{!607, !"cold"} +!607 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 794721636207329848} +!608 = !{!609, !"cold"} +!609 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 885980778253288927} +!610 = !{!611, !"cold"} +!611 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1015792291075825982} +!612 = !{!613, !"cold"} +!613 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1111950148150102502} +!614 = !{!615, !"cold"} +!615 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1180177230914639611} +!616 = !{!617, !"cold"} +!617 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1494295413782701148} +!618 = !{!619, !"cold"} +!619 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1659406033211294585} +!620 = !{!621, !"cold"} +!621 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1746300563922301711} +!622 = !{!623, !"cold"} +!623 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 1955855015757903991} +!624 = !{!625, !"cold"} +!625 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2061170524869170355} +!626 = !{!627, !"cold"} +!627 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2244685673087294926} +!628 = !{!629, !"cold"} +!629 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2258506400904444929} +!630 = !{!631, !"cold"} +!631 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 2780636789936373045} +!632 = !{!633, !"cold"} +!633 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3003382690369027000} +!634 = !{!635, !"cold"} +!635 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3036500738208828614} +!636 = !{!637, !"cold"} +!637 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3377720129330375308} +!638 = !{!639, !"cold"} +!639 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3516390910007184229} +!640 = !{!641, !"cold"} +!641 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3730017386320691583} +!642 = !{!643, !"cold"} +!643 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3863666838850843735} +!644 = !{!645, !"cold"} +!645 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 3987931074178356766} +!646 = !{!647, !"cold"} +!647 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4475965293000579642} +!648 = !{!649, !"cold"} +!649 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 4727386874466965794} +!650 = !{!651, !"cold"} +!651 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5626241009149718631} +!652 = !{!653, !"cold"} +!653 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5642831061945076553} +!654 = !{!655, !"cold"} +!655 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 5803992361678417930} +!656 = !{!657, !"cold"} +!657 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6027979419669521677} +!658 = !{!659, !"cold"} +!659 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6042081988325512172} +!660 = !{!661, !"cold"} +!661 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6431479059631025437} +!662 = !{!663, !"cold"} +!663 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6516605682960517029} +!664 = !{!665, !"cold"} +!665 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6591116691263167381} +!666 = !{!667, !"cold"} +!667 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 6616057203216317315} +!668 = !{!669, !"cold"} +!669 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7353174428633641643} +!670 = !{!671, !"cold"} +!671 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7359049424616934025} +!672 = !{!673, !"cold"} +!673 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7654798960227041366} +!674 = !{!675, !"cold"} +!675 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 7683835084651596728} +!676 = !{!677, !"cold"} +!677 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8205991499743838669} +!678 = !{!679, !"cold"} +!679 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8238108917955982391} +!680 = !{!681, !"cold"} +!681 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 8837495783078234090} +!682 = !{!683, !"cold"} +!683 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 9194683874556492116} +!684 = !{!685, !"cold"} +!685 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8803530003105057334} +!686 = !{!687, !"cold"} +!687 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8442443030798361395} +!688 = !{!689, !"cold"} +!689 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -8058981938237464077} +!690 = !{!691, !"cold"} +!691 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7589260736264366062} +!692 = !{!693, !"cold"} +!693 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7386139261276737944} +!694 = !{!695, !"cold"} +!695 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -7185537622496035457} +!696 = !{!697, !"cold"} +!697 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6900734672072851702} +!698 = !{!699, !"cold"} +!699 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6762801909492469455} +!700 = !{!701, !"cold"} +!701 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6724742556843950685} +!702 = !{!703, !"cold"} +!703 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -6409112906345845960} +!704 = !{!705, !"cold"} +!705 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5965768366903796804} +!706 = !{!707, !"cold"} +!707 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5850556812626124399} +!708 = !{!709, !"cold"} +!709 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5753111555624672216} +!710 = !{!711, !"cold"} +!711 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5726116740966450362} +!712 = !{!713, !"cold"} +!713 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5725103543964193355} +!714 = !{!715, !"cold"} +!715 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5597465602586680838} +!716 = !{!717, !"cold"} +!717 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5542310814437003522} +!718 = !{!719, !"cold"} +!719 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -5086634999161749506} +!720 = !{!721, !"cold"} +!721 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4892330537265564330} +!722 = !{!723, !"cold"} +!723 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -4597440142749257264} +!724 = !{!725, !"cold"} +!725 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3770505485117883193} +!726 = !{!727, !"cold"} +!727 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3715299750555754568} +!728 = !{!729, !"cold"} +!729 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3615756978728100936} +!730 = !{!731, !"cold"} +!731 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3487944172906860150} +!732 = !{!733, !"cold"} +!733 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3343380507450452769} +!734 = !{!735, !"cold"} +!735 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3290831674468055960} +!736 = !{!737, !"cold"} +!737 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -3257889137998070481} +!738 = !{!739, !"cold"} +!739 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2808938421088748093} +!740 = !{!741, !"cold"} +!741 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2656685852259003985} +!742 = !{!743, !"cold"} +!743 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2513421463471446585} +!744 = !{!745, !"cold"} +!745 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2346373089490877370} +!746 = !{!747, !"cold"} +!747 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -2188352265869670772} +!748 = !{!749, !"cold"} +!749 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1875947641213642418} +!750 = !{!751, !"cold"} +!751 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1820129202923097948} +!752 = !{!753, !"cold"} +!753 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1152657617489033068} +!754 = !{!755, !"cold"} +!755 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1141827363237952836} +!756 = !{!757, !"cold"} +!757 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -1045863445022014911} +!758 = !{!759, !"cold"} +!759 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -443019638236000223} +!760 = !{!761, !"cold"} +!761 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -365722648089426825} +!762 = !{!763, !"cold"} +!763 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -290581884459589510} +!764 = !{!765, !"cold"} +!765 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -2395126913894468764, i64 -128397300321256467} +!766 = !{!767, !"cold"} +!767 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908, i64 8550096939708590324, i64 3396817364192390559, i64 -3914526793362151039, i64 -281623063928291605, i64 -627092666823557924, i64 8922170082039419050, i64 -1631791060190603686} +!768 = !{i64 -7983807848080961559, i64 -8788491622811487171, i64 398112774497603563, i64 -2582828665053534093, i64 2078318003217690908} +!769 = !{i64 -2395126913894468764, i64 2258506400904444929} +!770 = !{i64 8287424466766266086} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll index 990a4a4..151ebf3 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes.ll @@ -4,6 +4,9 @@ ;; The code is similar to that of basic.ll, but with a second allocation. ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +;; Disable merge iteration for now as it causes spurious diffs due to different +;; iteration order (but the same ultimate hinting of the contexts). +; RUN: -memprof-merge-iteration=false \ ; RUN: -memprof-verify-ccg -memprof-dump-ccg %s -S 2>&1 | FileCheck %s \ ; RUN: --check-prefix=IR --check-prefix=DUMP diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll index 80f6bc7..5968c3f 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/mergenodes2.ll @@ -6,6 +6,9 @@ ;; share the merged nodes when possible. ; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +;; Disable merge iteration for now as it causes spurious diffs due to different +;; iteration order (but the same ultimate hinting of the contexts). +; RUN: -memprof-merge-iteration=false \ ; RUN: -memprof-verify-ccg -memprof-dump-ccg %s -S 2>&1 | FileCheck %s \ ; RUN: --check-prefix=IR --check-prefix=DUMP diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll index c98e7d3..482907d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll @@ -8,12 +8,12 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: ptr noalias noundef readonly captures(none) [[TMP0:%.*]], ptr noalias noundef writeonly captures(none) [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28 ; CHECK-NEXT: br label [[TMP4:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ] ; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], i64 [[TMP3]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 -28 ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5) ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> diff --git a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll index d313164..1a18526 100644 --- a/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll +++ b/llvm/test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=reassociate -S | FileCheck %s --check-prefixes=CHECK,CHECK-CV +; RUN: opt < %s -passes=reassociate -S -use-constant-int-for-fixed-length-splat=false | FileCheck %s --check-prefixes=CHECK,CHECK-CV ; RUN: opt < %s -passes=reassociate -S -use-constant-int-for-fixed-length-splat | FileCheck %s --check-prefixes=CHECK,CHECK-CI ; Check that a*c+b*c is turned into (a+b)*c diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll index b309682..2fad306c5 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll @@ -47,10 +47,8 @@ define void @testDisjointOrSplits(ptr %p) { ; CHECK-LABEL: define void @testDisjointOrSplits( ; CHECK-SAME: ptr [[P:%.*]]) { ; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo() -; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10 -; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P]], i64 [[VAR]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 10 ; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll index b0e88ef..a6b38bc 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-sub.ll @@ -31,11 +31,9 @@ define void @test_A_sub_B_add_ConstantInt(ptr %p) { ; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[REM]] to i64 ; CHECK-NEXT: [[SUB22:%.*]] = sub i64 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[P:%.*]] to i64 ; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[SUB22]], 2 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2044 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 2044 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[UGLYGEP]], i64 [[TMP4]] ; CHECK-NEXT: store float 1.000000e+00, ptr [[TMP7]], align 4 ; CHECK-NEXT: br label [[COND_END]] ; CHECK: cond.end: diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index 143cc38..2462e00 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -18,7 +18,17 @@ from lit.llvm.subst import ToolSubst config.name = "LLVM" # testFormat: The test format to use to interpret tests. -config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) +extra_substitutions = extra_substitutions = ( + [ + (r"\| not FileCheck .*", "> /dev/null"), + (r"\| FileCheck .*", "> /dev/null"), + ] + if config.enable_profcheck + else [] +) +config.test_format = lit.formats.ShTest( + not llvm_config.use_lit_shell, extra_substitutions +) # suffixes: A list of file extensions to treat as test files. This is overriden # by individual lit.local.cfg files in the test subdirectories. @@ -278,6 +288,7 @@ tools.extend( ] ) + # Find (major, minor) version of ptxas def ptxas_version(ptxas): ptxas_cmd = subprocess.Popen([ptxas, "--version"], stdout=subprocess.PIPE) @@ -451,7 +462,7 @@ if config.link_llvm_dylib: "%llvmdylib", "{}/libLLVM{}.{}".format( config.llvm_shlib_dir, config.llvm_shlib_ext, config.llvm_dylib_version - ) + ), ) ) @@ -582,6 +593,7 @@ def have_ld64_plugin_support(): if have_ld64_plugin_support(): config.available_features.add("ld64_plugin") + def host_unwind_supports_jit(): # Do we expect the host machine to support JIT registration of clang's # default unwind info format for the host (e.g. eh-frames, compact-unwind, @@ -589,7 +601,7 @@ def host_unwind_supports_jit(): # Linux and the BSDs use DWARF eh-frames and all known unwinders support # register_frame at minimum. - if platform.system() in [ "Linux", "FreeBSD", "NetBSD" ]: + if platform.system() in ["Linux", "FreeBSD", "NetBSD"]: return True # Windows does not support frame info without the ORC runtime. @@ -601,11 +613,7 @@ def host_unwind_supports_jit(): # compact-unwind only, and JIT'd registration is not available before # macOS 14.0. if platform.system() == "Darwin": - - assert ( - "arm64" in config.host_triple - or "x86_64" in config.host_triple - ) + assert "arm64" in config.host_triple or "x86_64" in config.host_triple if "x86_64" in config.host_triple: return True @@ -627,6 +635,7 @@ def host_unwind_supports_jit(): return False + if host_unwind_supports_jit(): config.available_features.add("host-unwind-supports-jit") diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test index 6368451..bd1c57d 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/amdgpu-no-merge-comments.test @@ -3,6 +3,7 @@ # Replace llc with cat, so we can simulate hypothetical output without actually running llc. # Copy the simulated output to the temporary directory. -# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments-O0.s %T/ && cp -f %S/Inputs/amdgpu_no_merge_comments-O3.s %T/ -# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments.ll %t.ll && %update_llc_test_checks --llc-binary cat %t.ll -# RUN: diff -u %S/Inputs/amdgpu_no_merge_comments.ll.expected %t.ll +# RUN: mkdir -p %t.dir +# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments-O0.s %t.dir/ && cp -f %S/Inputs/amdgpu_no_merge_comments-O3.s %t.dir/ +# RUN: cp -f %S/Inputs/amdgpu_no_merge_comments.ll %t.dir/file.ll && %update_llc_test_checks --llc-binary cat %t.dir/file.ll +# RUN: diff -u %S/Inputs/amdgpu_no_merge_comments.ll.expected %t.dir/file.ll diff --git a/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test b/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test index bc59414..70a263b 100644 --- a/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test +++ b/llvm/test/tools/dsymutil/X86/swift-ast-x86_64.test @@ -1,12 +1,13 @@ -RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL -RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL -RUN: llvm-readobj --sections --section-data %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ -RUN: llvm-dwarfdump --show-section-sizes %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP +RUN: rm -rf %t && mkdir %t +RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL +RUN: dsymutil -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL +RUN: llvm-readobj --sections --section-data %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ +RUN: llvm-dwarfdump --show-section-sizes %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP -RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL -RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %T/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL -RUN: llvm-readobj --sections --section-data %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ -RUN: llvm-dwarfdump --show-section-sizes %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP +RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose -no-swiftmodule-timestamp | FileCheck %s --check-prefix=DSYMUTIL +RUN: dsymutil --linker parallel -oso-prepend-path %p/.. %p/../Inputs/swift-ast.macho.x86_64 -o %t/swift-ast.dSYM -verbose | FileCheck %s --check-prefix=DSYMUTIL +RUN: llvm-readobj --sections --section-data %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=READOBJ +RUN: llvm-dwarfdump --show-section-sizes %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=DWARFDUMP The tested object file has been created by the dummy Swift code: @@ -27,5 +28,5 @@ READOBJ-NEXT: |.| DWARFDUMP: __swift_ast -RUN: dsymutil -s %T/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=NAST +RUN: dsymutil -s %t/swift-ast.dSYM/Contents/Resources/DWARF/swift-ast.macho.x86_64 | FileCheck %s --check-prefix=NAST NAST-NOT: N_AST diff --git a/llvm/test/tools/llvm-dwarfdump/dump_dwo.test b/llvm/test/tools/llvm-dwarfdump/dump_dwo.test index 7405955..c5f22a2 100644 --- a/llvm/test/tools/llvm-dwarfdump/dump_dwo.test +++ b/llvm/test/tools/llvm-dwarfdump/dump_dwo.test @@ -1,9 +1,10 @@ ## Check that llvm-dwarfdump works when dumping .o files with .dwo files. -# RUN: yaml2obj %p/Inputs/dump_dwo.o.yaml -o %T/dump_dwo.o -# RUN: yaml2obj %p/Inputs/dump_dwo.dwo.yaml -o %T/dump_dwo.dwo +# RUN: mkdir -p %t.dir +# RUN: yaml2obj %p/Inputs/dump_dwo.o.yaml -o %t.dir/dump_dwo.o +# RUN: yaml2obj %p/Inputs/dump_dwo.dwo.yaml -o %t.dir/dump_dwo.dwo ## We need to be in the same directory so we can find .dwo file -# RUN: cd %T +# RUN: cd %t.dir # RUN: llvm-dwarfdump --dwo dump_dwo.o | FileCheck %s # RUN: llvm-dwarfdump --dwo --name int --name char dump_dwo.o | FileCheck %s --check-prefix=NAMES # RUN: llvm-dwarfdump --name int --name char dump_dwo.o | FileCheck %s --check-prefix=NAMES_NO_DWO diff --git a/llvm/test/tools/llvm-libtool-darwin/L-and-l.test b/llvm/test/tools/llvm-libtool-darwin/L-and-l.test index 43a88f5..e8a5885 100644 --- a/llvm/test/tools/llvm-libtool-darwin/L-and-l.test +++ b/llvm/test/tools/llvm-libtool-darwin/L-and-l.test @@ -1,112 +1,113 @@ ## This test checks that -l and -L options work correctly. -# RUN: yaml2obj %S/Inputs/input1.yaml -o %t-input1.o -# RUN: yaml2obj %S/Inputs/input2.yaml -o %t-input2.o +# RUN: mkdir -p %t.dir +# RUN: yaml2obj %S/Inputs/input1.yaml -o %t.dir/input1.o +# RUN: yaml2obj %S/Inputs/input2.yaml -o %t.dir/input2.o ## Check that the library is recognised when it ends with '.o': -# RUN: llvm-libtool-darwin -static -o %t.lib -l%basename_t.tmp-input1.o -l%basename_t.tmp-input2.o -L%T +# RUN: llvm-libtool-darwin -static -o %t.lib -linput1.o -linput2.o -L%t.dir # RUN: llvm-ar t %t.lib | \ -# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} # RUN: llvm-nm --print-armap %t.lib | \ -# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines +# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS --match-full-lines -# CHECK-NAMES: [[PREFIX]]-input1.o -# CHECK-NAMES-NEXT: [[PREFIX]]-input2.o +# CHECK-NAMES: input1.o +# CHECK-NAMES-NEXT: input2.o # CHECK-SYMBOLS: Archive map -# CHECK-SYMBOLS-NEXT: _symbol1 in [[PREFIX]]-input1.o -# CHECK-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o +# CHECK-SYMBOLS-NEXT: _symbol1 in input1.o +# CHECK-SYMBOLS-NEXT: _symbol2 in input2.o # CHECK-SYMBOLS-EMPTY: ## Check that the library is recognised when prepended with 'lib' and appended with '.a': # RUN: rm -rf %t/dirname && mkdir -p %t/dirname -# RUN: llvm-ar cr %t/dirname/libinput2.a %t-input2.o +# RUN: llvm-ar cr %t/dirname/libinput2.a %t.dir/input2.o # RUN: llvm-libtool-darwin -static -o %t.lib -linput2 -L%t/dirname # RUN: llvm-ar t %t.lib | \ -# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} # RUN: llvm-nm --print-armap %t.lib | \ -# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines +# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS --match-full-lines -# SINGLE-NAMES: [[PREFIX]]-input2.o +# SINGLE-NAMES: input2.o # SINGLE-SYMBOLS: Archive map -# SINGLE-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o +# SINGLE-SYMBOLS-NEXT: _symbol2 in input2.o # SINGLE-SYMBOLS-EMPTY: ## -l and -L option specified multiple times: # RUN: rm -rf %t/otherDirname && mkdir -p %t/otherDirname -# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t-input1.o +# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t.dir/input1.o # RUN: llvm-libtool-darwin -static -o %t.lib -linput2 -linput1 -L%t/dirname -L%t/otherDirname # RUN: llvm-ar t %t.lib | \ -# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}} # RUN: llvm-nm --print-armap %t.lib | \ -# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines +# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS --match-full-lines ## Check it is possible to pass arguments to -l and -L separated from the option ## and the options specified multiple times: # RUN: rm -rf %t/otherDirname && mkdir -p %t/otherDirname -# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t-input1.o +# RUN: llvm-ar cr %t/otherDirname/libinput1.a %t.dir/input1.o # RUN: llvm-libtool-darwin -static -o %t.lib -l input2 -l input1 -L %t/dirname -L %t/otherDirname # RUN: llvm-ar t %t.lib | \ -# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: FileCheck %s --check-prefix=OTHER-NAMES --implicit-check-not={{.}} # RUN: llvm-nm --print-armap %t.lib | \ -# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines +# RUN: FileCheck %s --check-prefix=OTHER-SYMBOLS --match-full-lines -# OTHER-NAMES: [[PREFIX]]-input2.o -# OTHER-NAMES-NEXT: [[PREFIX]]-input1.o +# OTHER-NAMES: input2.o +# OTHER-NAMES-NEXT: input1.o # OTHER-SYMBOLS: Archive map -# OTHER-SYMBOLS-NEXT: _symbol2 in [[PREFIX]]-input2.o -# OTHER-SYMBOLS-NEXT: _symbol1 in [[PREFIX]]-input1.o +# OTHER-SYMBOLS-NEXT: _symbol2 in input2.o +# OTHER-SYMBOLS-NEXT: _symbol1 in input1.o # OTHER-SYMBOLS-EMPTY: ## Check that if multiple directories specified with -L have the same named file ## in them, the file from the first directory is selected. -# RUN: llvm-ar cr %t/otherDirname/libinput2.a %t-input1.o +# RUN: llvm-ar cr %t/otherDirname/libinput2.a %t.dir/input1.o # RUN: llvm-libtool-darwin -static -o %t.lib -linput2 -L%t/dirname -L%t/otherDirname # RUN: llvm-ar t %t.lib | \ -# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} # RUN: llvm-nm --print-armap %t.lib | \ -# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines +# RUN: FileCheck %s --check-prefix=SINGLE-SYMBOLS --match-full-lines ## Check that if two different files with the same names are explicitly ## specified, the command gives a warning. -# RUN: cp %t-input2.o %t/dirname +# RUN: cp %t.dir/input2.o %t/dirname # RUN: llvm-libtool-darwin -static -o %t.lib \ -# RUN: %t/dirname/%basename_t.tmp-input2.o %t-input2.o 2>&1 | \ +# RUN: %t/dirname/input2.o %t.dir/input2.o 2>&1 | \ # RUN: FileCheck %s --check-prefix=DUPLICATE-INPUT \ -# RUN: -DFILE=%basename_t.tmp-input2.o \ -# RUN: -DINPUTA=%t/dirname/%basename_t.tmp-input2.o \ -# RUN: -DINPUTB=%t-input2.o +# RUN: -DFILE=input2.o \ +# RUN: -DINPUTA=%t/dirname/input2.o \ +# RUN: -DINPUTB=%t.dir/input2.o # DUPLICATE-INPUT: warning: file '[[FILE]]' was specified multiple times. # DUPLICATE-INPUT-DAG: [[INPUTA]] # DUPLICATE-INPUT-DAG: [[INPUTB]] ## -l option combined with an input file: -# RUN: llvm-libtool-darwin -static -o %t.lib %t-input1.o -linput2 -L%t/dirname +# RUN: llvm-libtool-darwin -static -o %t.lib %t.dir/input1.o -linput2 -L%t/dirname # RUN: llvm-ar t %t.lib | \ -# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp +# RUN: FileCheck %s --check-prefix=CHECK-NAMES --implicit-check-not={{.}} # RUN: llvm-nm --print-armap %t.lib | \ -# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS -DPREFIX=%basename_t.tmp --match-full-lines +# RUN: FileCheck %s --check-prefix=CHECK-SYMBOLS --match-full-lines ## Specify the same file with a -l option and an input file: # RUN: rm -rf %t/copy # RUN: mkdir -p %t/copy -# RUN: cp %t-input1.o %t/copy +# RUN: cp %t.dir/input1.o %t/copy # RUN: llvm-libtool-darwin -static -o %t.lib \ -# RUN: %t/copy/%basename_t.tmp-input1.o -l%basename_t.tmp-input1.o -L%t/copy 2>&1 | \ -# RUN: FileCheck %s --check-prefix=DUPLICATE-L-INPUT -DFILE=%basename_t.tmp-input1.o +# RUN: %t/copy/input1.o -linput1.o -L%t/copy 2>&1 | \ +# RUN: FileCheck %s --check-prefix=DUPLICATE-L-INPUT -DFILE=input1.o ## Specify same -l option twice: -# RUN: llvm-libtool-darwin -static -o %t.lib -l%basename_t.tmp-input1.o \ -# RUN: -l%basename_t.tmp-input1.o -L%t/copy 2>&1 | \ +# RUN: llvm-libtool-darwin -static -o %t.lib -linput1.o \ +# RUN: -linput1.o -L%t/copy 2>&1 | \ # RUN: FileCheck %s --check-prefix=DUPLICATE-L-INPUT \ -# RUN: -DFILE=%basename_t.tmp-input1.o +# RUN: -DFILE=input1.o # DUPLICATE-L-INPUT: warning: file '[[FILE]]' was specified multiple times. @@ -123,11 +124,11 @@ ## Check that an error is thrown when the input library cannot be found ## (since 'lib' and '.a' are added): -# RUN: llvm-ar cr %t/dirname/file-does-exist %t-input1.o +# RUN: llvm-ar cr %t/dirname/file-does-exist %t.dir/input1.o # RUN: not llvm-libtool-darwin -static -o %t.lib -lfile-does-exist -L%t/dirname 2>&1 | \ # RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=libfile-does-exist.a -# RUN: llvm-ar cr %t/dirname/libfile-does-exist.a %t-input1.o +# RUN: llvm-ar cr %t/dirname/libfile-does-exist.a %t.dir/input1.o # RUN: not llvm-libtool-darwin -static -o %t.lib -llibfile-does-exist.a -L%t/dirname 2>&1 | \ # RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=liblibfile-does-exist.a.a @@ -145,13 +146,13 @@ # RUN: FileCheck %s --check-prefix=NOT-VALID -DFILE=libnot-valid.a ## Check that 'lib' and '.a' are not added to a file ending in '.o': -# RUN: llvm-ar cr %t/dirname/libfoo.o.a %t-input1.o +# RUN: llvm-ar cr %t/dirname/libfoo.o.a %t.dir/input1.o # RUN: not llvm-libtool-darwin -static -o %t.lib -lfoo.o -L%t/dirname 2>&1 | \ # RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=foo.o ## Check that 'lib' and '.a' are added to a file ending in any other extension ## beside '.o' (e.g. '.ext'): -# RUN: llvm-ar cr %t/dirname/libbar.ext.a %t-input2.o +# RUN: llvm-ar cr %t/dirname/libbar.ext.a %t.dir/input2.o # RUN: llvm-libtool-darwin -static -o %t.lib -lbar.ext -L%t/dirname # RUN: llvm-ar t %t.lib | \ # RUN: FileCheck %s --check-prefix=SINGLE-NAMES --implicit-check-not={{.}} -DPREFIX=%basename_t.tmp diff --git a/llvm/test/tools/llvm-objcopy/COFF/dump-section.test b/llvm/test/tools/llvm-objcopy/COFF/dump-section.test index 591135a..e96febc 100644 --- a/llvm/test/tools/llvm-objcopy/COFF/dump-section.test +++ b/llvm/test/tools/llvm-objcopy/COFF/dump-section.test @@ -4,7 +4,7 @@ # RUN: llvm-objcopy --dump-section .text.f=%t.txt %t.obj # RUN: od -t x1 %t.txt | FileCheck %s --ignore-case -check-prefix CHECK-TEXT-F # RUN: not llvm-objcopy --dump-section non-existent=/dev/null %t.obj 2>&1 | FileCheck %s -check-prefix CHECK-NO-SECTION -# RUN: not llvm-objcopy --dump-section .text=%T %t.obj 2>&1 | FileCheck -DOBJ=%t.obj -DMSG=%errc_EISDIR %s -check-prefix CHECK-INVALID-DESTINATION +# RUN: not llvm-objcopy --dump-section .text=%S %t.obj 2>&1 | FileCheck -DOBJ=%t.obj -DMSG=%errc_EISDIR %s -check-prefix CHECK-INVALID-DESTINATION # CHECK-EMPTY-SIZE: 0 diff --git a/llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll b/llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll new file mode 100644 index 0000000..d9ed9df --- /dev/null +++ b/llvm/test/tools/llvm-reduce/operands-to-args-lifetimes.ll @@ -0,0 +1,18 @@ +; RUN: llvm-reduce %s -o %t --abort-on-invalid-reduction --delta-passes=operands-to-args --test FileCheck --test-arg %s --test-arg --check-prefix=INTERESTING --test-arg --input-file +; RUN: FileCheck %s --input-file %t --check-prefix=REDUCED + +; INTERESTING: store +; REDUCED: define void @test(ptr %a) { +; REDUCED-NEXT: %a1 = alloca i32 +; REDUCED-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr %a1) +; REDUCED-NEXT: store i32 0, ptr %a +; REDUCED-NEXT: store i32 1, ptr %a +; REDUCED-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr %a1) +define void @test() { + %a = alloca i32 + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + store i32 0, ptr %a + store i32 1, ptr %a + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + ret void +} diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index c322b4f..875ec1b 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -1084,11 +1084,15 @@ int runOrcJIT(const char *ProgName) { // If this is a Mingw or Cygwin executor then we need to alias __main to // orc_rt_int_void_return_0. - if (J->getTargetTriple().isOSCygMing()) - ExitOnErr(J->getProcessSymbolsJITDylib()->define( + if (J->getTargetTriple().isOSCygMing()) { + auto &WorkaroundJD = J->getProcessSymbolsJITDylib() + ? *J->getProcessSymbolsJITDylib() + : J->getMainJITDylib(); + ExitOnErr(WorkaroundJD.define( orc::absoluteSymbols({{J->mangleAndIntern("__main"), {orc::ExecutorAddr::fromPtr(mingw_noop_main), JITSymbolFlags::Exported}}}))); + } // Regular modules are greedy: They materialize as a whole and trigger // materialization for all required symbols recursively. Lazy modules go diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp index 1865a59..864f695 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -49,6 +50,10 @@ static bool canReduceUse(Use &Op) { if (&CI->getCalledOperandUse() == &Op) return false; + // lifetime.start/lifetime.end require alloca argument. + if (isa<LifetimeIntrinsic>(Op.getUser())) + return false; + return true; } diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 7a5fd83..9609e8e 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -5540,6 +5540,287 @@ TEST(APFloatTest, PPCDoubleDoubleFrexp) { EXPECT_EQ(0x3c98000000000000ull, Result.bitcastToAPInt().getRawData()[1]); } +TEST(APFloatTest, PPCDoubleDoubleNext) { + auto NextUp = [](APFloat X) { + X.next(/*nextDown=*/false); + return X; + }; + + auto NextDown = [](APFloat X) { + X.next(/*nextDown=*/true); + return X; + }; + + auto Zero = [] { + return APFloat::getZero(APFloat::IEEEdouble()); + }; + + auto One = [] { + return APFloat::getOne(APFloat::IEEEdouble()); + }; + + // 0x1p-1074 + auto MinSubnormal = [] { + return APFloat::getSmallest(APFloat::IEEEdouble()); + }; + + // 2^-52 + auto Eps = [&] { + const fltSemantics &Sem = APFloat::IEEEdouble(); + return scalbn(One(), 1 - APFloat::semanticsPrecision(Sem), + APFloat::rmNearestTiesToEven); + }; + + // 2^-53 + auto EpsNeg = [&] { return scalbn(Eps(), -1, APFloat::rmNearestTiesToEven); }; + + auto MakeDoubleAPFloat = [](auto Hi, auto Lo) { + APFloat HiFloat{APFloat::IEEEdouble(), APFloat::uninitialized}; + if constexpr (std::is_same_v<decltype(Hi), APFloat>) { + HiFloat = Hi; + } else { + HiFloat = {APFloat::IEEEdouble(), Hi}; + } + + APFloat LoFloat{APFloat::IEEEdouble(), APFloat::uninitialized}; + if constexpr (std::is_same_v<decltype(Lo), APFloat>) { + LoFloat = Lo; + } else { + LoFloat = {APFloat::IEEEdouble(), Lo}; + } + + APInt Bits = LoFloat.bitcastToAPInt().concat(HiFloat.bitcastToAPInt()); + return APFloat(APFloat::PPCDoubleDouble(), Bits); + }; + APFloat Test(APFloat::PPCDoubleDouble(), APFloat::uninitialized); + APFloat Expected(APFloat::PPCDoubleDouble(), APFloat::uninitialized); + + // 1. Test Special Cases Values. + // + // Test all special values for nextUp and nextDown prescribed by IEEE-754R + // 2008. These are: + // 1. +inf + // 2. -inf + // 3. getLargest() + // 4. -getLargest() + // 5. getSmallest() + // 6. -getSmallest() + // 7. qNaN + // 8. sNaN + // 9. +0 + // 10. -0 + + // nextUp(+inf) = +inf. + Test = APFloat::getInf(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.isPosInfinity()); + EXPECT_TRUE(!Test.isNegative()); + + // nextDown(+inf) = -nextUp(-inf) = -(-getLargest()) = getLargest() + Test = APFloat::getInf(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_FALSE(Test.isNegative()); + EXPECT_TRUE(Test.isLargest()); + + // nextUp(-inf) = -getLargest() + Test = APFloat::getInf(APFloat::PPCDoubleDouble(), true); + Expected = APFloat::getLargest(APFloat::PPCDoubleDouble(), true); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.isNegative()); + EXPECT_TRUE(Test.isLargest()); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // nextDown(-inf) = -nextUp(+inf) = -(+inf) = -inf. + Test = APFloat::getInf(APFloat::PPCDoubleDouble(), true); + Expected = APFloat::getInf(APFloat::PPCDoubleDouble(), true); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_TRUE(Test.isNegInfinity()); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // nextUp(getLargest()) = +inf + Test = APFloat::getLargest(APFloat::PPCDoubleDouble(), false); + Expected = APFloat::getInf(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.isPosInfinity()); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // nextUp(-getSmallest()) = -0. + Test = APFloat::getSmallest(Test.getSemantics(), /*Neg=*/true); + Expected = APFloat::getZero(APFloat::PPCDoubleDouble(), true); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.isNegZero()); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // nextDown(getSmallest()) = -nextUp(-getSmallest()) = -(-0) = +0. + Test = APFloat::getSmallest(Test.getSemantics(), /*Neg=*/false); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_TRUE(Test.isPosZero()); + + // nextDown(-getLargest()) = -nextUp(getLargest()) = -(inf) = -inf. + Test = APFloat::getLargest(APFloat::PPCDoubleDouble(), true); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_TRUE(Test.isNegInfinity()); + + // nextUp(qNaN) = qNaN + Test = APFloat::getQNaN(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.isNaN()); + EXPECT_FALSE(Test.isSignaling()); + + // nextDown(qNaN) = qNaN + Test = APFloat::getQNaN(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_TRUE(Test.isNaN()); + EXPECT_FALSE(Test.isSignaling()); + + // nextUp(sNaN) = qNaN + Test = APFloat::getSNaN(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(false), APFloat::opInvalidOp); + EXPECT_TRUE(Test.isNaN()); + EXPECT_FALSE(Test.isSignaling()); + + // nextDown(sNaN) = qNaN + Test = APFloat::getSNaN(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(true), APFloat::opInvalidOp); + EXPECT_TRUE(Test.isNaN()); + EXPECT_FALSE(Test.isSignaling()); + + // nextUp(+0) = +getSmallest() + Test = APFloat::getZero(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_FALSE(Test.isNegative()); + EXPECT_TRUE(Test.isSmallest()); + + // nextDown(+0) = -nextUp(-0) = -getSmallest() + Test = APFloat::getZero(APFloat::PPCDoubleDouble(), false); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_TRUE(Test.isNegative()); + EXPECT_TRUE(Test.isSmallest()); + + // nextUp(-0) = +getSmallest() + Test = APFloat::getZero(APFloat::PPCDoubleDouble(), true); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_FALSE(Test.isNegative()); + EXPECT_TRUE(Test.isSmallest()); + + // nextDown(-0) = -nextUp(0) = -getSmallest() + Test = APFloat::getZero(APFloat::PPCDoubleDouble(), true); + EXPECT_EQ(Test.next(true), APFloat::opOK); + EXPECT_TRUE(Test.isNegative()); + EXPECT_TRUE(Test.isSmallest()); + + // 2. Cases where the lo APFloat is zero. + + // 2a. |hi| < 2*DBL_MIN_NORMAL (DD precision == D precision) + Test = APFloat(APFloat::PPCDoubleDouble(), "0x1.fffffffffffffp-1022"); + Expected = APFloat(APFloat::PPCDoubleDouble(), "0x1p-1021"); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); + + // 2b. |hi| >= 2*DBL_MIN_NORMAL (DD precision > D precision) + // Test at hi = 1.0, lo = 0. + Test = MakeDoubleAPFloat(One(), Zero()); + Expected = MakeDoubleAPFloat(One(), MinSubnormal()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // Test at hi = -1.0. delta = 2^-1074 (positive, moving towards +Inf). + Test = MakeDoubleAPFloat(-One(), Zero()); + Expected = MakeDoubleAPFloat(-One(), MinSubnormal()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // Testing the boundary where calculated delta equals DBL_TRUE_MIN. + // Requires ilogb(hi) = E = -968. + // delta = 2^(-968 - 106) = 2^-1074 = DBL_TRUE_MIN. + Test = MakeDoubleAPFloat("0x1p-968", Zero()); + Expected = MakeDoubleAPFloat("0x1p-968", MinSubnormal()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // Testing below the boundary (E < -968). Delta clamps to DBL_TRUE_MIN. + Test = MakeDoubleAPFloat("0x1p-969", Zero()); + Expected = MakeDoubleAPFloat("0x1p-969", MinSubnormal()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // 3. Standard Increment (No rollover) + // hi=1.0, lo=2^-1074. + Test = MakeDoubleAPFloat(One(), MinSubnormal()); + Expected = MakeDoubleAPFloat(One(), NextUp(MinSubnormal())); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // Incrementing negative lo. + Test = MakeDoubleAPFloat(One(), -MinSubnormal()); + Expected = MakeDoubleAPFloat(One(), Zero()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); + + // Crossing lo=0. + Test = MakeDoubleAPFloat(One(), -MinSubnormal()); + Expected = MakeDoubleAPFloat(One(), Zero()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_EQ(Test.compare(Expected), APFloat::cmpEqual); + + // 4. Rollover Cases around 1.0 (Positive hi) + // hi=1.0, lo=nextDown(2^-53). + Test = MakeDoubleAPFloat(One(), NextDown(EpsNeg())); + EXPECT_FALSE(Test.isDenormal()); + Expected = MakeDoubleAPFloat(One(), EpsNeg()); + EXPECT_FALSE(Test.isDenormal()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + + // Input: (1, ulp(1)/2). nextUp(lo)=next(H). V>Midpoint. Rollover occurs + // Can't naively increment lo: + // RTNE(0x1p+0 + 0x1.0000000000001p-53) == 0x1.0000000000001p+0. + // Can't naively TwoSum(0x1p+0, nextUp(0x1p-53)): + // It gives {nextUp(0x1p+0), nextUp(nextUp(-0x1p-53))} but the next + // number should be {nextUp(0x1p+0), nextUp(-0x1p-53)}. + Test = MakeDoubleAPFloat(One(), EpsNeg()); + EXPECT_FALSE(Test.isDenormal()); + Expected = MakeDoubleAPFloat(NextUp(One()), NextUp(-EpsNeg())); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + EXPECT_FALSE(Test.isDenormal()); + + // hi = nextDown(1), lo = nextDown(0x1p-54) + Test = MakeDoubleAPFloat(NextDown(One()), NextDown(APFloat(0x1p-54))); + EXPECT_FALSE(Test.isDenormal()); + Expected = MakeDoubleAPFloat(One(), APFloat(-0x1p-54)); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + EXPECT_FALSE(Test.isDenormal()); + + // 5. Negative Rollover (Moving towards Zero / +Inf) + + // hi = -1, lo = nextDown(0x1p-54) + Test = MakeDoubleAPFloat(APFloat(-1.0), NextDown(APFloat(0x1p-54))); + EXPECT_FALSE(Test.isDenormal()); + Expected = MakeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54)); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + EXPECT_FALSE(Test.isDenormal()); + + // hi = -1, lo = 0x1p-54 + Test = MakeDoubleAPFloat(APFloat(-1.0), APFloat(0x1p-54)); + EXPECT_FALSE(Test.isDenormal()); + Expected = + MakeDoubleAPFloat(NextUp(APFloat(-1.0)), NextUp(APFloat(-0x1p-54))); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + EXPECT_FALSE(Test.isDenormal()); + + // 6. Rollover across Power of 2 boundary (Exponent change) + Test = MakeDoubleAPFloat(NextDown(APFloat(2.0)), NextDown(EpsNeg())); + EXPECT_FALSE(Test.isDenormal()); + Expected = MakeDoubleAPFloat(APFloat(2.0), -EpsNeg()); + EXPECT_EQ(Test.next(false), APFloat::opOK); + EXPECT_TRUE(Test.bitwiseIsEqual(Expected)); + EXPECT_FALSE(Test.isDenormal()); +} + TEST(APFloatTest, x87Largest) { APFloat MaxX87Val = APFloat::getLargest(APFloat::x87DoubleExtended()); EXPECT_TRUE(MaxX87Val.isLargest()); diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index 4e0bf38..16b9979 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -859,3 +859,35 @@ TEST_F(SelectionDAGPatternMatchTest, MatchZeroOneAllOnes) { EXPECT_TRUE(sd_match(Vec, DAG.get(), m_AllOnes(true))); } } + +TEST_F(SelectionDAGPatternMatchTest, MatchSelectCCLike) { + using namespace SDPatternMatch; + + SDValue LHS = DAG->getConstant(1, SDLoc(), MVT::i32); + SDValue RHS = DAG->getConstant(2, SDLoc(), MVT::i32); + SDValue TVal = DAG->getConstant(3, SDLoc(), MVT::i32); + SDValue FVal = DAG->getConstant(4, SDLoc(), MVT::i32); + SDValue Select = DAG->getNode(ISD::SELECT_CC, SDLoc(), MVT::i32, LHS, RHS, + TVal, FVal, DAG->getCondCode(ISD::SETLT)); + + ISD::CondCode CC = ISD::SETLT; + EXPECT_TRUE(sd_match( + Select, m_SelectCCLike(m_Specific(LHS), m_Specific(RHS), m_Specific(TVal), + m_Specific(FVal), m_CondCode(CC)))); +} + +TEST_F(SelectionDAGPatternMatchTest, MatchSelectCC) { + using namespace SDPatternMatch; + + SDValue LHS = DAG->getConstant(1, SDLoc(), MVT::i32); + SDValue RHS = DAG->getConstant(2, SDLoc(), MVT::i32); + SDValue TVal = DAG->getConstant(3, SDLoc(), MVT::i32); + SDValue FVal = DAG->getConstant(4, SDLoc(), MVT::i32); + SDValue Select = DAG->getNode(ISD::SELECT_CC, SDLoc(), MVT::i32, LHS, RHS, + TVal, FVal, DAG->getCondCode(ISD::SETLT)); + + ISD::CondCode CC = ISD::SETLT; + EXPECT_TRUE(sd_match(Select, m_SelectCC(m_Specific(LHS), m_Specific(RHS), + m_Specific(TVal), m_Specific(FVal), + m_CondCode(CC)))); +} diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp index 3df4107..ad06ca9 100644 --- a/llvm/unittests/Support/CommandLineTest.cpp +++ b/llvm/unittests/Support/CommandLineTest.cpp @@ -28,6 +28,9 @@ #include <fstream> #include <stdlib.h> #include <string> +#if HAVE_UNISTD_H +#include <unistd.h> +#endif using namespace llvm; using llvm::unittest::TempDir; @@ -834,14 +837,23 @@ TEST(CommandLineTest, DefaultOptions) { } TEST(CommandLineTest, ArgumentLimit) { - std::string args(32 * 4096, 'a'); - EXPECT_FALSE(llvm::sys::commandLineFitsWithinSystemLimits("cl", args.data())); +#if HAVE_UNISTD_H && defined(_SC_ARG_MAX) + if (sysconf(_SC_ARG_MAX) != -1) { +#endif + std::string args(32 * 4096, 'a'); + EXPECT_FALSE( + llvm::sys::commandLineFitsWithinSystemLimits("cl", args.data())); +#if HAVE_UNISTD_H && defined(_SC_ARG_MAX) + } +#endif std::string args2(256, 'a'); EXPECT_TRUE(llvm::sys::commandLineFitsWithinSystemLimits("cl", args2.data())); } TEST(CommandLineTest, ArgumentLimitWindows) { - if (!Triple(sys::getProcessTriple()).isOSWindows()) + Triple processTriple(sys::getProcessTriple()); + if (!processTriple.isOSWindows() || + processTriple.isWindowsCygwinEnvironment()) GTEST_SKIP(); // We use 32000 as a limit for command line length. Program name ('cl'), // separating spaces and termination null character occupy 5 symbols. @@ -854,7 +866,9 @@ TEST(CommandLineTest, ArgumentLimitWindows) { } TEST(CommandLineTest, ResponseFileWindows) { - if (!Triple(sys::getProcessTriple()).isOSWindows()) + Triple processTriple(sys::getProcessTriple()); + if (!processTriple.isOSWindows() || + processTriple.isWindowsCygwinEnvironment()) GTEST_SKIP(); StackOption<std::string, cl::list<std::string>> InputFilenames( diff --git a/llvm/unittests/Support/DebugLogTest.cpp b/llvm/unittests/Support/DebugLogTest.cpp index 0c464c1..c24d1a5 100644 --- a/llvm/unittests/Support/DebugLogTest.cpp +++ b/llvm/unittests/Support/DebugLogTest.cpp @@ -121,7 +121,7 @@ TEST(DebugLogTest, StreamPrefix) { EXPECT_EQ(os.str(), expected); } // After destructors, there was a pending newline for stream B. - EXPECT_EQ(os.str(), expected + "\nPrefixB \n"); + EXPECT_EQ(os.str(), expected + "PrefixB "); } #else TEST(DebugLogTest, Basic) { diff --git a/llvm/unittests/Support/DynamicLibrary/PipSqueak.h b/llvm/unittests/Support/DynamicLibrary/PipSqueak.h index 3eac1e0..dc069ca 100644 --- a/llvm/unittests/Support/DynamicLibrary/PipSqueak.h +++ b/llvm/unittests/Support/DynamicLibrary/PipSqueak.h @@ -22,7 +22,7 @@ #include <vector> #endif -#ifdef _WIN32 +#if defined(_WIN32) || defined(__CYGWIN__) #define PIPSQUEAK_EXPORT __declspec(dllexport) #elif defined(__MVS__) #define PIPSQUEAK_EXPORT __attribute__((__visibility__("default"))) diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp index fc3ccea..6228de8 100644 --- a/llvm/unittests/Support/VirtualFileSystemTest.cpp +++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp @@ -553,6 +553,10 @@ TEST(VirtualFileSystemTest, PhysicalFileSystemWorkingDirFailure) { // Some platforms (e.g. Solaris) disallow removal of the working directory. GTEST_SKIP() << "test requires deletion of working directory"; +#ifdef __CYGWIN__ + GTEST_SKIP() << "Cygwin getcwd succeeds with unlinked working directory"; +#endif + // Verify that we still get two separate working directories. auto FS1 = vfs::createPhysicalFileSystem(); auto FS2 = vfs::createPhysicalFileSystem(); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp index 118bf67..7471355 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp @@ -41,7 +41,8 @@ protected: AARes.reset(new AAResults(*TLI)); AARes->addAAResult(*BasicAA); PSE.reset(new PredicatedScalarEvolution(*SE, *L)); - LAI.reset(new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI)); + LAI.reset( + new LoopAccessInfo(L, &*SE, nullptr, &*TLI, &*AARes, &*DT, &*LI, &*AC)); IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI)); IAI->analyzeInterleaving(false); return {Plan, *IAI}; diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 9e06c42..b698b28 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -321,6 +321,8 @@ TEST_F(VPVerifierTest, NonHeaderPHIInHeader) { ::testing::internal::GetCapturedStderr().c_str()); #endif #endif + + delete PHINode; } class VPIRVerifierTest : public VPlanTestIRBase {}; diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index 7fe2da8..937b34a6 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -1659,13 +1659,22 @@ def EmitC_FieldOp : EmitC_Op<"field", [Symbol]> { emitc.field @fieldName0 : !emitc.array<1xf32> {emitc.opaque = "another_feature"} // Example with no attribute: emitc.field @fieldName0 : !emitc.array<1xf32> + // Example with an initial value: + emitc.field @fieldName0 : !emitc.array<1xf32> = dense<0.0> + // Example with an initial value and attributes: + emitc.field @fieldName0 : !emitc.array<1xf32> = dense<0.0> { + emitc.opaque = "input_tensor"} ``` }]; let arguments = (ins SymbolNameAttr:$sym_name, TypeAttr:$type, - OptionalAttr<AnyAttr>:$attrs); + OptionalAttr<EmitC_OpaqueOrTypedAttr>:$initial_value); - let assemblyFormat = [{ $sym_name `:` $type ($attrs^)? attr-dict}]; + let assemblyFormat = [{ + $sym_name + `:` custom<EmitCFieldOpTypeAndInitialValue>($type, $initial_value) + attr-dict + }]; let hasVerifier = 1; } @@ -1686,7 +1695,7 @@ def EmitC_GetFieldOp }]; let arguments = (ins FlatSymbolRefAttr:$field_name); - let results = (outs AnyTypeOf<[EmitC_ArrayType, EmitC_LValueType]>:$result); + let results = (outs EmitCType:$result); let assemblyFormat = "$field_name `:` type($result) attr-dict"; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 45a8904..30df3b7 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -1990,10 +1990,30 @@ def NVVM_WMMAMmaOp : NVVM_Op<"wmma.mma">, let hasVerifier = 1; } -def NVVM_StMatrixOp: NVVM_PTXBuilder_Op<"stmatrix">, - Arguments<(ins LLVM_PointerShared:$ptr, - Variadic<I32>:$sources, - MMALayoutAttr:$layout)> { +def LdStMatrixShapeAttr : NVVM_Attr<"LdStMatrixShape", "ld_st_matrix_shape"> { + let summary = "Matrix shape for ldmatrix and stmatrix"; + let parameters = (ins "int":$m, "int":$n); + let assemblyFormat = "`<` struct(params) `>`"; +} + +def LdStMatrixEltTypeB16 : I32EnumAttrCase<"B16", 0, "b16">; +def LdStMatrixEltTypeB8 : I32EnumAttrCase<"B8", 1, "b8">; +def LdStMatrixEltTypeB8X16_B6X16_P32 : I32EnumAttrCase<"B8X16_B6X16_P32", 2, "b8x16.b6x16_p32">; +def LdStMatrixEltTypeB8X16_B4X16_P64 : I32EnumAttrCase<"B8X16_B4X16_P64", 3, "b8x16.b4x16_p64">; + +def LdStMatrixEltType : I32EnumAttr<"LdStMatrixEltType", "Element type for ldmatrix and stmatrix", + [LdStMatrixEltTypeB16, LdStMatrixEltTypeB8, + LdStMatrixEltTypeB8X16_B6X16_P32, LdStMatrixEltTypeB8X16_B4X16_P64]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::NVVM"; +} +def LdStMatrixEltTypeAttr : EnumAttr<NVVM_Dialect, LdStMatrixEltType, "ld_st_matrix_elt_type"> { + let assemblyFormat = "`<` $value `>`"; +} + +def NVVM_StMatrixOp: NVVM_Op<"stmatrix">, + Arguments<(ins LLVM_PointerShared: $ptr, Variadic<I32>:$sources, MMALayoutAttr:$layout, + LdStMatrixShapeAttr:$shape, LdStMatrixEltTypeAttr:$eltType)> { let summary = "cooperative matrix store"; let description = [{ Collectively store one or more matrices across all threads in a warp to the @@ -2001,21 +2021,12 @@ def NVVM_StMatrixOp: NVVM_PTXBuilder_Op<"stmatrix">, [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#warp-level-matrix-store-instruction-stmatrix) }]; - - let assemblyFormat = "$ptr `,` $sources attr-dict `:` type(operands)"; - let extraClassDefinition = [{ - std::string $cppClass::getPtx() { - int d = getSources().size(); - std::string ptx = "stmatrix.sync.aligned"; - ptx += ".x" + std::to_string(d); - if (getLayout() == NVVM::MMALayout::col) - ptx += ".trans"; - if(d == 1) ptx += ".m8n8.shared.b16 [%0], {%1};"; - if(d == 2) ptx += ".m8n8.shared.b16 [%0], {%1, %2};"; - if(d == 4) ptx += ".m8n8.shared.b16 [%0], {%1, %2, %3, %4};"; - return ptx; - } + string llvmBuilder = [{ + auto operands = moduleTranslation.lookupValues(opInst.getOperands()); + auto intId = getStMatrixIntrinsicId($layout, $sources.size(), $shape, $eltType); + createIntrinsicCall(builder, intId, operands, operands[0]->getType()); }]; + let assemblyFormat = "$ptr `,` $sources attr-dict `:` type(operands)"; let hasVerifier = 1; } diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td index e1e99c3..18d5f2d 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -1532,6 +1532,10 @@ def OpenACC_ParallelOp : OpenACC_Op<"parallel", /// types. void addWaitOperands(MLIRContext *, bool hasDevnum, mlir::ValueRange, llvm::ArrayRef<DeviceType>); + + /// Adds a private clause variable to this operation, including its recipe. + void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe); }]; let assemblyFormat = [{ @@ -1674,6 +1678,9 @@ def OpenACC_SerialOp : OpenACC_Op<"serial", /// types. void addWaitOperands(MLIRContext *, bool hasDevnum, mlir::ValueRange, llvm::ArrayRef<DeviceType>); + /// Adds a private clause variable to this operation, including its recipe. + void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe); }]; let assemblyFormat = [{ @@ -2396,6 +2403,10 @@ def OpenACC_LoopOp : OpenACC_Op<"loop", // This first checks if the mode is set for the device_type requested. // And if not, it returns the non-device_type mode. LoopParMode getDefaultOrDeviceTypeParallelism(DeviceType); + + /// Adds a private clause variable to this operation, including its recipe. + void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe); }]; let hasCustomAssemblyFormat = 1; diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td index 9c74cff0..bdfd728 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -405,6 +405,7 @@ def SPV_INTEL_memory_access_aliasing : I32EnumAttrCase<"SPV_INTEL_me def SPV_INTEL_split_barrier : I32EnumAttrCase<"SPV_INTEL_split_barrier", 4029>; def SPV_INTEL_bfloat16_conversion : I32EnumAttrCase<"SPV_INTEL_bfloat16_conversion", 4031>; def SPV_INTEL_cache_controls : I32EnumAttrCase<"SPV_INTEL_cache_controls", 4032>; +def SPV_INTEL_tensor_float32_conversion : I32EnumAttrCase<"SPV_INTEL_tensor_float32_conversion", 4033>; def SPV_NV_compute_shader_derivatives : I32EnumAttrCase<"SPV_NV_compute_shader_derivatives", 5000>; def SPV_NV_cooperative_matrix : I32EnumAttrCase<"SPV_NV_cooperative_matrix", 5001>; @@ -468,6 +469,7 @@ def SPIRV_ExtensionAttr : SPV_INTEL_debug_module, SPV_INTEL_fp_fast_math_mode, SPV_INTEL_memory_access_aliasing, SPV_INTEL_split_barrier, SPV_INTEL_bfloat16_conversion, SPV_INTEL_cache_controls, + SPV_INTEL_tensor_float32_conversion, SPV_NV_compute_shader_derivatives, SPV_NV_cooperative_matrix, SPV_NV_fragment_shader_barycentric, SPV_NV_geometry_shader_passthrough, SPV_NV_ray_tracing, SPV_NV_sample_mask_override_coverage, @@ -1465,6 +1467,12 @@ def SPIRV_C_Bfloat16ConversionINTEL : I32EnumAttrCase<"B ]; } +def SPIRV_C_TensorFloat32RoundingINTEL : I32EnumAttrCase<"TensorFloat32RoundingINTEL", 6425> { + list<Availability> availability = [ + Extension<[SPV_INTEL_tensor_float32_conversion]> + ]; +} + def SPIRV_C_CacheControlsINTEL : I32EnumAttrCase<"CacheControlsINTEL", 6441> { list<Availability> availability = [ Extension<[SPV_INTEL_cache_controls]> @@ -1567,7 +1575,8 @@ def SPIRV_CapabilityAttr : SPIRV_C_ShaderViewportIndexLayerEXT, SPIRV_C_ShaderViewportMaskNV, SPIRV_C_ShaderStereoViewNV, SPIRV_C_Bfloat16ConversionINTEL, SPIRV_C_CacheControlsINTEL, SPIRV_C_BFloat16TypeKHR, - SPIRV_C_BFloat16DotProductKHR, SPIRV_C_BFloat16CooperativeMatrixKHR + SPIRV_C_BFloat16DotProductKHR, SPIRV_C_BFloat16CooperativeMatrixKHR, + SPIRV_C_TensorFloat32RoundingINTEL ]>; def SPIRV_AM_Logical : I32EnumAttrCase<"Logical", 0>; @@ -4277,7 +4286,7 @@ class SPIRV_MatrixOfType<list<Type> allowedTypes> : "Matrix">; class SPIRV_VectorOf<Type type> : - VectorOfLengthAndType<[2, 3, 4, 8, 16], [type]>; + FixedVectorOfLengthAndType<[2, 3, 4, 8, 16], [type]>; class SPIRV_ScalarOrVectorOf<Type type> : AnyTypeOf<[type, SPIRV_VectorOf<type>]>; @@ -4587,6 +4596,7 @@ def SPIRV_OC_OpControlBarrierArriveINTEL : I32EnumAttrCase<"OpControlBarrie def SPIRV_OC_OpControlBarrierWaitINTEL : I32EnumAttrCase<"OpControlBarrierWaitINTEL", 6143>; def SPIRV_OC_OpGroupIMulKHR : I32EnumAttrCase<"OpGroupIMulKHR", 6401>; def SPIRV_OC_OpGroupFMulKHR : I32EnumAttrCase<"OpGroupFMulKHR", 6402>; +def SPIRV_OC_OpRoundFToTF32INTEL : I32EnumAttrCase<"OpRoundFToTF32INTEL", 6426>; def SPIRV_OpcodeAttr : SPIRV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", "opcode", [ @@ -4692,7 +4702,8 @@ def SPIRV_OpcodeAttr : SPIRV_OC_OpAssumeTrueKHR, SPIRV_OC_OpAtomicFAddEXT, SPIRV_OC_OpConvertFToBF16INTEL, SPIRV_OC_OpConvertBF16ToFINTEL, SPIRV_OC_OpControlBarrierArriveINTEL, SPIRV_OC_OpControlBarrierWaitINTEL, - SPIRV_OC_OpGroupIMulKHR, SPIRV_OC_OpGroupFMulKHR + SPIRV_OC_OpGroupIMulKHR, SPIRV_OC_OpGroupFMulKHR, + SPIRV_OC_OpRoundFToTF32INTEL ]>; // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY! diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td index 82d26e3..2a7fa53 100644 --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td @@ -11,6 +11,7 @@ // at (https://github.com/intel/llvm) // Supported extensions // * SPV_INTEL_bfloat16_conversion +// * SPV_INTEL_tensor_float32_conversion //===----------------------------------------------------------------------===// @@ -19,7 +20,7 @@ // ----- -def SPIRV_INTELConvertFToBF16Op : SPIRV_IntelVendorOp<"ConvertFToBF16", []> { +def SPIRV_INTELConvertFToBF16Op : SPIRV_IntelVendorOp<"ConvertFToBF16", [SameOperandsAndResultShape]> { let summary = "See extension SPV_INTEL_bfloat16_conversion"; let description = [{ @@ -58,16 +59,17 @@ def SPIRV_INTELConvertFToBF16Op : SPIRV_IntelVendorOp<"ConvertFToBF16", []> { let results = (outs SPIRV_ScalarOrVectorOf<SPIRV_Int16>:$result ); + let assemblyFormat = [{ $operand attr-dict `:` type($operand) `to` type($result) }]; - let hasVerifier = 1; + let hasVerifier = 0; } // ----- -def SPIRV_INTELConvertBF16ToFOp : SPIRV_IntelVendorOp<"ConvertBF16ToF", []> { +def SPIRV_INTELConvertBF16ToFOp : SPIRV_IntelVendorOp<"ConvertBF16ToF", [SameOperandsAndResultShape]> { let summary = "See extension SPV_INTEL_bfloat16_conversion"; let description = [{ @@ -107,9 +109,57 @@ def SPIRV_INTELConvertBF16ToFOp : SPIRV_IntelVendorOp<"ConvertBF16ToF", []> { let assemblyFormat = [{ $operand attr-dict `:` type($operand) `to` type($result) }]; - let hasVerifier = 1; + + let hasVerifier = 0; } +// ----- + +def SPIRV_INTELRoundFToTF32Op : SPIRV_IntelVendorOp<"RoundFToTF32", [SameOperandsAndResultShape]> { + let summary = "See extension SPV_INTEL_tensor_float32_conversion"; + + let description = [{ + Convert value numerically from a 32-bit floating point type to tensor float32, + with rounding to the nearest even. + + Result Type must be a scalar or vector of 32-bit floating-point type. + The component width must be 32 bits. Bit pattern in the Result represents a tensor float32 value. + + Float Value must be a scalar or vector of floating-point type. + It must have the same number of components as Result Type. The component width must be 32 bits. + + Results are computed per component. + + #### Example: + + ```mlir + %1 = spirv.RoundFToTF32 %0 : f32 to f32 + %3 = spirv.RoundFToTF32 %2 : vector<3xf32> to vector<3xf32> + ``` + + }]; + + let availability = [ + MinVersion<SPIRV_V_1_0>, + MaxVersion<SPIRV_V_1_6>, + Extension<[SPV_INTEL_tensor_float32_conversion]>, + Capability<[SPIRV_C_TensorFloat32RoundingINTEL]> + ]; + + let arguments = (ins + SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$operand + ); + + let results = (outs + SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$result + ); + + let assemblyFormat = [{ + $operand attr-dict `:` type($operand) `to` type($result) + }]; + + let hasVerifier = 0; +} // ----- diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 5d45508..dc55704 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -2780,6 +2780,10 @@ def Vector_SplatOp : Vector_Op<"splat", [ let assemblyFormat = "$input attr-dict `:` type($aggregate)"; let hasFolder = 1; + + // vector.splat is deprecated, and vector.broadcast should be used instead. + // Canonicalize vector.splat to vector.broadcast. + let hasCanonicalizer = 1; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h index edc8ab4..4f89f8b 100644 --- a/mlir/include/mlir/IR/Operation.h +++ b/mlir/include/mlir/IR/Operation.h @@ -1125,6 +1125,26 @@ inline raw_ostream &operator<<(raw_ostream &os, return os; } +/// A wrapper class that allows for printing an operation with a custom +/// AsmState, useful to act as a "stream modifier" to customize printing an +/// operation with a stream using the operator<< overload, e.g.: +/// llvm::dbgs() << OpWithState(op, OpPrintingFlags().skipRegions()); +class OpWithState { +public: + OpWithState(Operation *op, AsmState &state) : op(op), theState(state) {} + +private: + Operation *op; + AsmState &theState; + friend raw_ostream &operator<<(raw_ostream &os, const OpWithState &op); +}; + +inline raw_ostream &operator<<(raw_ostream &os, + const OpWithState &opWithState) { + opWithState.op->print(os, const_cast<OpWithState &>(opWithState).theState); + return os; +} + } // namespace mlir namespace llvm { diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp index 1817861..3545acb 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -385,6 +385,14 @@ LogicalResult GPUModuleConversion::matchAndRewrite( if (auto attr = moduleOp->getAttrOfType<spirv::TargetEnvAttr>( spirv::getTargetEnvAttrName())) spvModule->setAttr(spirv::getTargetEnvAttrName(), attr); + if (ArrayAttr targets = moduleOp.getTargetsAttr()) { + for (Attribute targetAttr : targets) + if (auto spirvTargetEnvAttr = + dyn_cast<spirv::TargetEnvAttr>(targetAttr)) { + spvModule->setAttr(spirv::getTargetEnvAttrName(), spirvTargetEnvAttr); + break; + } + } rewriter.eraseOp(moduleOp); return success(); diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp index a344f88..5eab057 100644 --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRVPass.cpp @@ -48,9 +48,36 @@ struct GPUToSPIRVPass final : impl::ConvertGPUToSPIRVBase<GPUToSPIRVPass> { void runOnOperation() override; private: + /// Queries the target environment from 'targets' attribute of the given + /// `moduleOp`. + spirv::TargetEnvAttr lookupTargetEnvInTargets(gpu::GPUModuleOp moduleOp); + + /// Queries the target environment from 'targets' attribute of the given + /// `moduleOp` or returns target environment as returned by + /// `spirv::lookupTargetEnvOrDefault` if not provided by 'targets'. + spirv::TargetEnvAttr lookupTargetEnvOrDefault(gpu::GPUModuleOp moduleOp); bool mapMemorySpace; }; +spirv::TargetEnvAttr +GPUToSPIRVPass::lookupTargetEnvInTargets(gpu::GPUModuleOp moduleOp) { + if (ArrayAttr targets = moduleOp.getTargetsAttr()) { + for (Attribute targetAttr : targets) + if (auto spirvTargetEnvAttr = dyn_cast<spirv::TargetEnvAttr>(targetAttr)) + return spirvTargetEnvAttr; + } + + return {}; +} + +spirv::TargetEnvAttr +GPUToSPIRVPass::lookupTargetEnvOrDefault(gpu::GPUModuleOp moduleOp) { + if (spirv::TargetEnvAttr targetEnvAttr = lookupTargetEnvInTargets(moduleOp)) + return targetEnvAttr; + + return spirv::lookupTargetEnvOrDefault(moduleOp); +} + void GPUToSPIRVPass::runOnOperation() { MLIRContext *context = &getContext(); ModuleOp module = getOperation(); @@ -58,9 +85,8 @@ void GPUToSPIRVPass::runOnOperation() { SmallVector<Operation *, 1> gpuModules; OpBuilder builder(context); - auto targetEnvSupportsKernelCapability = [](gpu::GPUModuleOp moduleOp) { - Operation *gpuModule = moduleOp.getOperation(); - auto targetAttr = spirv::lookupTargetEnvOrDefault(gpuModule); + auto targetEnvSupportsKernelCapability = [this](gpu::GPUModuleOp moduleOp) { + auto targetAttr = lookupTargetEnvOrDefault(moduleOp); spirv::TargetEnv targetEnv(targetAttr); return targetEnv.allows(spirv::Capability::Kernel); }; @@ -86,7 +112,7 @@ void GPUToSPIRVPass::runOnOperation() { // TargetEnv attributes. for (Operation *gpuModule : gpuModules) { spirv::TargetEnvAttr targetAttr = - spirv::lookupTargetEnvOrDefault(gpuModule); + lookupTargetEnvOrDefault(cast<gpu::GPUModuleOp>(gpuModule)); // Map MemRef memory space to SPIR-V storage class first if requested. if (mapMemorySpace) { diff --git a/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp b/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp index 807be7e..ba448e4 100644 --- a/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp +++ b/mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp @@ -312,6 +312,19 @@ struct ForallLowering : public OpRewritePattern<mlir::scf::ForallOp> { } // namespace +static void propagateLoopAttrs(Operation *scfOp, Operation *brOp) { + // Let the CondBranchOp carry the LLVM attributes from the ForOp, such as the + // llvm.loop_annotation attribute. + // LLVM requires the loop metadata to be attached on the "latch" block. Which + // is the back-edge to the header block (conditionBlock) + SmallVector<NamedAttribute> llvmAttrs; + llvm::copy_if(scfOp->getAttrs(), std::back_inserter(llvmAttrs), + [](auto attr) { + return isa<LLVM::LLVMDialect>(attr.getValue().getDialect()); + }); + brOp->setDiscardableAttrs(llvmAttrs); +} + LogicalResult ForLowering::matchAndRewrite(ForOp forOp, PatternRewriter &rewriter) const { Location loc = forOp.getLoc(); @@ -350,17 +363,7 @@ LogicalResult ForLowering::matchAndRewrite(ForOp forOp, auto branchOp = cf::BranchOp::create(rewriter, loc, conditionBlock, loopCarried); - // Let the CondBranchOp carry the LLVM attributes from the ForOp, such as the - // llvm.loop_annotation attribute. - // LLVM requires the loop metadata to be attached on the "latch" block. Which - // is the back-edge to the header block (conditionBlock) - SmallVector<NamedAttribute> llvmAttrs; - llvm::copy_if(forOp->getAttrs(), std::back_inserter(llvmAttrs), - [](auto attr) { - return isa<LLVM::LLVMDialect>(attr.getValue().getDialect()); - }); - branchOp->setDiscardableAttrs(llvmAttrs); - + propagateLoopAttrs(forOp, branchOp); rewriter.eraseOp(terminator); // Compute loop bounds before branching to the condition. @@ -589,9 +592,10 @@ LogicalResult WhileLowering::matchAndRewrite(WhileOp whileOp, rewriter.setInsertionPointToEnd(after); auto yieldOp = cast<scf::YieldOp>(after->getTerminator()); - rewriter.replaceOpWithNewOp<cf::BranchOp>(yieldOp, before, - yieldOp.getResults()); + auto latch = rewriter.replaceOpWithNewOp<cf::BranchOp>(yieldOp, before, + yieldOp.getResults()); + propagateLoopAttrs(whileOp, latch); // Replace the op with values "yielded" from the "before" region, which are // visible by dominance. rewriter.replaceOp(whileOp, args); @@ -631,10 +635,11 @@ DoWhileLowering::matchAndRewrite(WhileOp whileOp, // Loop around the "before" region based on condition. rewriter.setInsertionPointToEnd(before); auto condOp = cast<ConditionOp>(before->getTerminator()); - cf::CondBranchOp::create(rewriter, condOp.getLoc(), condOp.getCondition(), - before, condOp.getArgs(), continuation, - ValueRange()); + auto latch = cf::CondBranchOp::create( + rewriter, condOp.getLoc(), condOp.getCondition(), before, + condOp.getArgs(), continuation, ValueRange()); + propagateLoopAttrs(whileOp, latch); // Replace the op with values "yielded" from the "before" region, which are // visible by dominance. rewriter.replaceOp(whileOp, condOp.getArgs()); diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index 4c09022..e6a3154 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -1398,6 +1398,45 @@ void FileOp::build(OpBuilder &builder, OperationState &state, StringRef id) { //===----------------------------------------------------------------------===// // FieldOp //===----------------------------------------------------------------------===// +static void printEmitCFieldOpTypeAndInitialValue(OpAsmPrinter &p, FieldOp op, + TypeAttr type, + Attribute initialValue) { + p << type; + if (initialValue) { + p << " = "; + p.printAttributeWithoutType(initialValue); + } +} + +static Type getInitializerTypeForField(Type type) { + if (auto array = llvm::dyn_cast<ArrayType>(type)) + return RankedTensorType::get(array.getShape(), array.getElementType()); + return type; +} + +static ParseResult +parseEmitCFieldOpTypeAndInitialValue(OpAsmParser &parser, TypeAttr &typeAttr, + Attribute &initialValue) { + Type type; + if (parser.parseType(type)) + return failure(); + + typeAttr = TypeAttr::get(type); + + if (parser.parseOptionalEqual()) + return success(); + + if (parser.parseAttribute(initialValue, getInitializerTypeForField(type))) + return failure(); + + if (!llvm::isa<ElementsAttr, IntegerAttr, FloatAttr, emitc::OpaqueAttr>( + initialValue)) + return parser.emitError(parser.getNameLoc()) + << "initial value should be a integer, float, elements or opaque " + "attribute"; + return success(); +} + LogicalResult FieldOp::verify() { if (!isSupportedEmitCType(getType())) return emitOpError("expected valid emitc type"); @@ -1410,9 +1449,6 @@ LogicalResult FieldOp::verify() { if (!symName || symName.getValue().empty()) return emitOpError("field must have a non-empty symbol name"); - if (!getAttrs()) - return success(); - return success(); } diff --git a/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp b/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp index fa05ad8..c55e26e 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp @@ -58,17 +58,18 @@ public: auto argAttrs = funcOp.getArgAttrs(); for (auto [idx, val] : llvm::enumerate(funcOp.getArguments())) { - StringAttr fieldName; - Attribute argAttr = nullptr; - - fieldName = rewriter.getStringAttr("fieldName" + std::to_string(idx)); - if (argAttrs && idx < argAttrs->size()) - argAttr = (*argAttrs)[idx]; + StringAttr fieldName = + rewriter.getStringAttr("fieldName" + std::to_string(idx)); TypeAttr typeAttr = TypeAttr::get(val.getType()); fields.push_back({fieldName, typeAttr}); - emitc::FieldOp::create(rewriter, funcOp.getLoc(), fieldName, typeAttr, - argAttr); + + FieldOp fieldop = rewriter.create<emitc::FieldOp>( + funcOp->getLoc(), fieldName, typeAttr, nullptr); + + if (argAttrs && idx < argAttrs->size()) { + fieldop->setDiscardableAttrs(funcOp.getArgAttrDict(idx)); + } } rewriter.setInsertionPointToEnd(&newClassOp.getBody().front()); diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 52cd0ce..e0977f5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -813,15 +813,26 @@ LogicalResult NVVM::LdMatrixOp::verify() { } LogicalResult NVVM::StMatrixOp::verify() { - unsigned addressSpace = - llvm::cast<LLVM::LLVMPointerType>(getPtr().getType()).getAddressSpace(); - if (addressSpace != NVVM::kSharedMemorySpace) - return emitOpError("expected source pointer in memory space 3"); - int numMatrix = getSources().size(); if (numMatrix != 1 && numMatrix != 2 && numMatrix != 4) return emitOpError("expected num attribute to be 1, 2 or 4"); + int m = getShape().getM(), n = getShape().getN(); + if (m == 8 && n == 8) { + if (getEltType() != NVVM::LdStMatrixEltType::B16) { + return emitOpError("expected element type to be B16 for 8x8 matrix"); + } + } else if (m == 16 && n == 8) { + if (getEltType() != NVVM::LdStMatrixEltType::B8) { + return emitOpError("expected element type to be B8 for 16x8 matrix"); + } + if (getLayout() != NVVM::MMALayout::col) { + return emitOpError("expected layout to be col for 16x8 matrix"); + } + } else { + return emitOpError("expected shape to be 8x8 or 16x8"); + } + return success(); } diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index ea68b1a..0860cea 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1831,6 +1831,53 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, linalg::PackOp packOp, return success(); } +/// Given the re-associations, "collapses" the input Vector type +/// +/// This is similar to CollapseShapeOp::inferCollapsedType with two notable +/// differences: +/// * We can safely assume that there are no dynamic sizes. +/// * Scalable flags are updated alongside regular dims. +/// +/// When collapsing scalable flags, conservatively avoids cases with two +/// scalable dims. We could re-visit this in the future. +/// +/// EXAMPLE: +/// type = vector<4x16x[8]x16xf32> +/// reassociation = [(d0, d1, d2, d3) -> (d0, d1), +/// (d0, d1, d2, d3) -> (d2, d3)] +/// Result: +/// vector<64x[128]xf32> +static VectorType getCollapsedVecType(VectorType type, + ArrayRef<AffineMap> reassociation) { + assert(type.getNumScalableDims() < 2 && + "Collapsing more than 1 scalable dim is not supported ATM"); + + // Use the fact that reassociation is valid to simplify the logic: only use + // each map's rank. + assert(isReassociationValid(reassociation) && "invalid reassociation"); + + auto shape = type.getShape(); + auto scalableFlags = type.getScalableDims(); + SmallVector<int64_t> newShape; + SmallVector<bool> newScalableFlags; + + unsigned currentDim = 0; + for (AffineMap m : reassociation) { + unsigned dim = m.getNumResults(); + int64_t size = 1; + bool flag = false; + for (unsigned d = 0; d < dim; ++d) { + size *= shape[currentDim + d]; + flag |= scalableFlags[currentDim + d]; + } + newShape.push_back(size); + newScalableFlags.push_back(flag); + currentDim += dim; + } + + return VectorType::get(newShape, type.getElementType(), newScalableFlags); +} + /// Vectorize a `linalg::UnPackOp` to these 4 Ops: /// Vector::TransferReadOp - Reads a vector from the source tensor /// vector::TransposeOp - Transpose the Source tensor @@ -1928,23 +1975,17 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, linalg::UnPackOp unpackOp, PackingMetadata packMetadata; SmallVector<int64_t> lastDimToInsertPosPerm = getUnPackInverseSrcPerm(unpackOp, packMetadata); - ShapedType maskedOpShapedType = cast<ShapedType>(readResult.getType()); - SmallVector<int64_t> stripMineShape(maskedOpShapedType.getShape()); - mlir::Type stripMineElemType = maskedOpShapedType.getElementType(); - applyPermutationToVector(stripMineShape, lastDimToInsertPosPerm); - RankedTensorType stripMineTensorType = - RankedTensorType::get(stripMineShape, stripMineElemType); // Transpose the appropriate rows to match output. vector::TransposeOp transposeOp = vector::TransposeOp::create( rewriter, loc, readResult, lastDimToInsertPosPerm); // Collapse the vector to the size required by result. - RankedTensorType collapsedType = tensor::CollapseShapeOp::inferCollapsedType( - stripMineTensorType, packMetadata.reassociations); - mlir::VectorType vecCollapsedType = - VectorType::get(collapsedType.getShape(), collapsedType.getElementType()); + VectorType collapsedVecType = getCollapsedVecType( + transposeOp.getType(), + getSymbolLessAffineMaps(convertReassociationIndicesToExprs( + rewriter.getContext(), packMetadata.reassociations))); vector::ShapeCastOp shapeCastOp = vector::ShapeCastOp::create( - rewriter, loc, vecCollapsedType, transposeOp->getResult(0)); + rewriter, loc, collapsedVecType, transposeOp->getResult(0)); Operation *write = createWriteOrMaskedWrite( rewriter, loc, shapeCastOp.getResult(), unpackOp.getDest(), diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index 9d5dfc1..485bb73 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -1375,6 +1375,21 @@ void acc::ParallelOp::addWaitOperands( setHasWaitDevnumAttr(mlir::ArrayAttr::get(context, hasDevnums)); } +void acc::ParallelOp::addPrivatization(MLIRContext *context, + mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe) { + getPrivateOperandsMutable().append(op.getResult()); + + llvm::SmallVector<mlir::Attribute> recipes; + + if (getPrivatizationRecipesAttr()) + llvm::copy(getPrivatizationRecipesAttr(), std::back_inserter(recipes)); + + recipes.push_back( + mlir::SymbolRefAttr::get(context, recipe.getSymName().str())); + setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); +} + static ParseResult parseNumGangs( mlir::OpAsmParser &parser, llvm::SmallVectorImpl<mlir::OpAsmParser::UnresolvedOperand> &operands, @@ -2011,6 +2026,21 @@ void acc::SerialOp::addWaitOperands( setHasWaitDevnumAttr(mlir::ArrayAttr::get(context, hasDevnums)); } +void acc::SerialOp::addPrivatization(MLIRContext *context, + mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe) { + getPrivateOperandsMutable().append(op.getResult()); + + llvm::SmallVector<mlir::Attribute> recipes; + + if (getPrivatizationRecipesAttr()) + llvm::copy(getPrivatizationRecipesAttr(), std::back_inserter(recipes)); + + recipes.push_back( + mlir::SymbolRefAttr::get(context, recipe.getSymName().str())); + setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); +} + //===----------------------------------------------------------------------===// // KernelsOp //===----------------------------------------------------------------------===// @@ -3014,6 +3044,21 @@ void acc::LoopOp::addGangOperands( } } +void acc::LoopOp::addPrivatization(MLIRContext *context, + mlir::acc::PrivateOp op, + mlir::acc::PrivateRecipeOp recipe) { + getPrivateOperandsMutable().append(op.getResult()); + + llvm::SmallVector<mlir::Attribute> recipes; + + if (getPrivatizationRecipesAttr()) + llvm::copy(getPrivatizationRecipesAttr(), std::back_inserter(recipes)); + + recipes.push_back( + mlir::SymbolRefAttr::get(context, recipe.getSymName().str())); + setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes)); +} + //===----------------------------------------------------------------------===// // DataOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp b/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp index e27dc27..fcf4eb6 100644 --- a/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp @@ -270,48 +270,6 @@ LogicalResult ConvertUToFOp::verify() { } //===----------------------------------------------------------------------===// -// spirv.INTELConvertBF16ToFOp -//===----------------------------------------------------------------------===// - -LogicalResult INTELConvertBF16ToFOp::verify() { - auto operandType = getOperand().getType(); - auto resultType = getResult().getType(); - // ODS checks that vector result type and vector operand type have the same - // shape. - if (auto vectorType = llvm::dyn_cast<VectorType>(operandType)) { - unsigned operandNumElements = vectorType.getNumElements(); - unsigned resultNumElements = - llvm::cast<VectorType>(resultType).getNumElements(); - if (operandNumElements != resultNumElements) { - return emitOpError( - "operand and result must have same number of elements"); - } - } - return success(); -} - -//===----------------------------------------------------------------------===// -// spirv.INTELConvertFToBF16Op -//===----------------------------------------------------------------------===// - -LogicalResult INTELConvertFToBF16Op::verify() { - auto operandType = getOperand().getType(); - auto resultType = getResult().getType(); - // ODS checks that vector result type and vector operand type have the same - // shape. - if (auto vectorType = llvm::dyn_cast<VectorType>(operandType)) { - unsigned operandNumElements = vectorType.getNumElements(); - unsigned resultNumElements = - llvm::cast<VectorType>(resultType).getNumElements(); - if (operandNumElements != resultNumElements) { - return emitOpError( - "operand and result must have same number of elements"); - } - } - return success(); -} - -//===----------------------------------------------------------------------===// // spirv.FConvertOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 52c672a..f993398 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -767,19 +767,25 @@ void mlir::spirv::AddressOfOp::getAsmResultNames( // spirv.EXTConstantCompositeReplicate //===----------------------------------------------------------------------===// +// Returns type of attribute. In case of a TypedAttr this will simply return +// the type. But for an ArrayAttr which is untyped and can be multidimensional +// it creates the ArrayType recursively. +static Type getValueType(Attribute attr) { + if (auto typedAttr = dyn_cast<TypedAttr>(attr)) { + return typedAttr.getType(); + } + + if (auto arrayAttr = dyn_cast<ArrayAttr>(attr)) { + return spirv::ArrayType::get(getValueType(arrayAttr[0]), arrayAttr.size()); + } + + return nullptr; +} + LogicalResult spirv::EXTConstantCompositeReplicateOp::verify() { - Type valueType; - if (auto typedAttr = dyn_cast<TypedAttr>(getValue())) { - valueType = typedAttr.getType(); - } else if (auto arrayAttr = dyn_cast<ArrayAttr>(getValue())) { - auto typedElemAttr = dyn_cast<TypedAttr>(arrayAttr[0]); - if (!typedElemAttr) - return emitError("value attribute is not typed"); - valueType = - spirv::ArrayType::get(typedElemAttr.getType(), arrayAttr.size()); - } else { + Type valueType = getValueType(getValue()); + if (!valueType) return emitError("unknown value attribute type"); - } auto compositeType = dyn_cast<spirv::CompositeType>(getType()); if (!compositeType) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index 6d2cbb5..e3cba388 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -452,18 +452,14 @@ struct ClampIsNoOp : public OpRewritePattern<tosa::ClampOp> { auto inputType = llvm::dyn_cast<RankedTensorType>(op.getInput().getType()); auto inputElementType = inputType.getElementType(); - if (!inputType.hasStaticShape()) { - return failure(); - } - if (isa<FloatType>(inputElementType)) { // Unlike integer types, floating point types can represent infinity. - auto minClamp = + const auto minClamp = llvm::cast<mlir::FloatAttr>(op.getMinValAttr()).getValue(); - auto maxClamp = + const auto maxClamp = llvm::cast<mlir::FloatAttr>(op.getMaxValAttr()).getValue(); - bool isMin = minClamp.isNegInfinity(); - bool isMax = maxClamp.isInfinity(); + const bool isMin = minClamp.isNegInfinity(); + const bool isMax = maxClamp.isInfinity(); if (isMin && isMax) { rewriter.replaceOp(op, input); @@ -472,18 +468,19 @@ struct ClampIsNoOp : public OpRewritePattern<tosa::ClampOp> { return failure(); } - if (inputElementType.isUnsignedInteger()) { - int64_t minClamp = - llvm::cast<mlir::IntegerAttr>(op.getMinValAttr()).getUInt(); - int64_t maxClamp = - llvm::cast<mlir::IntegerAttr>(op.getMaxValAttr()).getUInt(); + // i1 types are boolean in TOSA + const bool isBoolean = inputElementType.isInteger(1); + if (inputElementType.isUnsignedInteger() || isBoolean) { + const int64_t minClamp = llvm::cast<mlir::IntegerAttr>(op.getMinValAttr()) + .getValue() + .getZExtValue(); + const int64_t maxClamp = llvm::cast<mlir::IntegerAttr>(op.getMaxValAttr()) + .getValue() + .getZExtValue(); - int64_t intMin = - APInt::getMinValue(inputElementType.getIntOrFloatBitWidth()) - .getZExtValue(); - int64_t intMax = - APInt::getMaxValue(inputElementType.getIntOrFloatBitWidth()) - .getZExtValue(); + const unsigned bitWidth = inputElementType.getIntOrFloatBitWidth(); + const int64_t intMin = APInt::getMinValue(bitWidth).getZExtValue(); + const int64_t intMax = APInt::getMaxValue(bitWidth).getZExtValue(); if (minClamp <= intMin && maxClamp >= intMax) { rewriter.replaceOp(op, input); @@ -493,17 +490,14 @@ struct ClampIsNoOp : public OpRewritePattern<tosa::ClampOp> { } if (llvm::isa<IntegerType>(inputElementType)) { - int64_t minClamp = + const int64_t minClamp = llvm::cast<mlir::IntegerAttr>(op.getMinValAttr()).getInt(); - int64_t maxClamp = + const int64_t maxClamp = llvm::cast<mlir::IntegerAttr>(op.getMaxValAttr()).getInt(); - int64_t intMin = - APInt::getSignedMinValue(inputElementType.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputElementType.getIntOrFloatBitWidth()) - .getSExtValue(); + const unsigned bitWidth = inputElementType.getIntOrFloatBitWidth(); + const int64_t intMin = APInt::getSignedMinValue(bitWidth).getSExtValue(); + const int64_t intMax = APInt::getSignedMaxValue(bitWidth).getSExtValue(); if (minClamp <= intMin && maxClamp >= intMax) { rewriter.replaceOp(op, input); diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp index 8ec7765..c7b9534 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp @@ -1381,7 +1381,7 @@ void TosaValidation::runOnOperation() { // Some uses of TOSA rely on the constant operands of particular // operations. - if (strictOpSpecAlignment && failed(applyConstantOperandCheck(op))) + if (failed(applyConstantOperandCheck(op))) signalPassFailure(); // do level checks diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index a21b5ba..a450056 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -2476,17 +2476,19 @@ OpFoldResult FromElementsOp::fold(FoldAdaptor adaptor) { return {}; } -/// Rewrite a vector.from_elements into a vector.splat if all elements are the -/// same SSA value. E.g.: -/// -/// %0 = vector.from_elements %a, %a, %a : vector<3xf32> -/// ==> rewrite to vector.splat %a : vector<3xf32> -static LogicalResult rewriteFromElementsAsSplat(FromElementsOp fromElementsOp, - PatternRewriter &rewriter) { +/// Rewrite vector.from_elements as vector.broadcast if the elements are the +/// same. Example: +/// %0 = vector.from_elements %a, %a, %a : vector<3xf32> +/// => +/// %0 = vector.broadcast %a : f32 to vector<3xf32> +static LogicalResult +rewriteFromElementsAsBroadcast(FromElementsOp fromElementsOp, + PatternRewriter &rewriter) { if (!llvm::all_equal(fromElementsOp.getElements())) return failure(); - rewriter.replaceOpWithNewOp<SplatOp>(fromElementsOp, fromElementsOp.getType(), - fromElementsOp.getElements().front()); + rewriter.replaceOpWithNewOp<BroadcastOp>( + fromElementsOp, fromElementsOp.getType(), + fromElementsOp.getElements().front()); return success(); } @@ -2517,7 +2519,7 @@ class FromElementsToShapeCast : public OpRewritePattern<FromElementsOp> { LogicalResult matchAndRewrite(FromElementsOp fromElements, PatternRewriter &rewriter) const override { - // Handled by `rewriteFromElementsAsSplat` + // Handled by `rewriteFromElementsAsBroadcast`. if (fromElements.getType().getNumElements() == 1) return failure(); @@ -2610,7 +2612,7 @@ class FromElementsToShapeCast : public OpRewritePattern<FromElementsOp> { void FromElementsOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(rewriteFromElementsAsSplat); + results.add(rewriteFromElementsAsBroadcast); results.add<FromElementsToShapeCast>(context); } @@ -3058,23 +3060,50 @@ struct Canonicalize0DShuffleOp : public OpRewritePattern<ShuffleOp> { } }; -/// Pattern to rewrite a ShuffleOp(SplatOp, SplatOp) to SplatOp. +/// Consider the defining operation `defOp` of `value`. If `defOp` is a +/// vector.splat or a vector.broadcast with a scalar operand, return the scalar +/// value that is splatted. Otherwise return null. +/// +/// Examples: +/// +/// scalar_source --> vector.splat --> value - return scalar_source +/// scalar_source --> vector.broadcast --> value - return scalar_source +static Value getScalarSplatSource(Value value) { + // Block argument: + Operation *defOp = value.getDefiningOp(); + if (!defOp) + return {}; + + // Splat: + if (auto splat = dyn_cast<vector::SplatOp>(defOp)) + return splat.getInput(); + + auto broadcast = dyn_cast<vector::BroadcastOp>(defOp); + + // Not broadcast (and not splat): + if (!broadcast) + return {}; + + // Broadcast of a vector: + if (isa<VectorType>(broadcast.getSourceType())) + return {}; + + // Broadcast of a scalar: + return broadcast.getSource(); +} + +/// Pattern to rewrite shuffle(splat-like(v), splat-like(v)) as broadcast(v). class ShuffleSplat final : public OpRewritePattern<ShuffleOp> { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ShuffleOp op, PatternRewriter &rewriter) const override { - auto v1Splat = op.getV1().getDefiningOp<SplatOp>(); - auto v2Splat = op.getV2().getDefiningOp<SplatOp>(); - - if (!v1Splat || !v2Splat) + Value splat = getScalarSplatSource(op.getV1()); + if (!splat || getScalarSplatSource(op.getV2()) != splat) return failure(); - if (v1Splat.getInput() != v2Splat.getInput()) - return failure(); - - rewriter.replaceOpWithNewOp<SplatOp>(op, op.getType(), v1Splat.getInput()); + rewriter.replaceOpWithNewOp<BroadcastOp>(op, op.getType(), splat); return success(); } }; @@ -3230,23 +3259,19 @@ public: } }; -/// Pattern to rewrite a InsertOp(SplatOp, SplatOp) to SplatOp. +/// Pattern to rewrite a insert(splat-like(v), splat-like(v)) as broadcast(v). class InsertSplatToSplat final : public OpRewritePattern<InsertOp> { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(InsertOp op, PatternRewriter &rewriter) const override { - auto srcSplat = op.getValueToStore().getDefiningOp<SplatOp>(); - auto dstSplat = op.getDest().getDefiningOp<SplatOp>(); - - if (!srcSplat || !dstSplat) - return failure(); - if (srcSplat.getInput() != dstSplat.getInput()) + Value splat = getScalarSplatSource(op.getValueToStore()); + if (!splat || getScalarSplatSource(op.getDest()) != splat) return failure(); - rewriter.replaceOpWithNewOp<SplatOp>(op, op.getType(), srcSplat.getInput()); + rewriter.replaceOpWithNewOp<BroadcastOp>(op, op.getType(), splat); return success(); } }; @@ -3514,8 +3539,7 @@ LogicalResult InsertStridedSliceOp::verify() { } namespace { -/// Pattern to rewrite an InsertStridedSliceOp(SplatOp(X):src_type, -/// SplatOp(X):dst_type) to SplatOp(X):dst_type. +/// Rewrite insert_strided_slice(splat-like(v), splat-like(v)) as v. class FoldInsertStridedSliceSplat final : public OpRewritePattern<InsertStridedSliceOp> { public: @@ -3523,18 +3547,13 @@ public: LogicalResult matchAndRewrite(InsertStridedSliceOp insertStridedSliceOp, PatternRewriter &rewriter) const override { - auto srcSplatOp = - insertStridedSliceOp.getValueToStore().getDefiningOp<vector::SplatOp>(); - auto destSplatOp = - insertStridedSliceOp.getDest().getDefiningOp<vector::SplatOp>(); - if (!srcSplatOp || !destSplatOp) + auto dst = insertStridedSliceOp.getDest(); + auto splat = getScalarSplatSource(insertStridedSliceOp.getValueToStore()); + if (!splat || getScalarSplatSource(dst) != splat) return failure(); - if (srcSplatOp.getInput() != destSplatOp.getInput()) - return failure(); - - rewriter.replaceOp(insertStridedSliceOp, insertStridedSliceOp.getDest()); + rewriter.replaceOp(insertStridedSliceOp, dst); return success(); } }; @@ -4189,17 +4208,18 @@ public: } }; -/// Pattern to rewrite an ExtractStridedSliceOp(SplatOp) to SplatOp. +/// Rewrite extract_strided_slice(splat-like(v)) with broadcast(v). class StridedSliceSplat final : public OpRewritePattern<ExtractStridedSliceOp> { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ExtractStridedSliceOp op, PatternRewriter &rewriter) const override { - auto splat = op.getVector().getDefiningOp<SplatOp>(); + + Value splat = getScalarSplatSource(op.getVector()); if (!splat) return failure(); - rewriter.replaceOpWithNewOp<SplatOp>(op, op.getType(), splat.getInput()); + rewriter.replaceOpWithNewOp<BroadcastOp>(op, op.getType(), splat); return success(); } }; @@ -6354,19 +6374,19 @@ public: } }; -// Folds transpose(splat x : src_type) : res_type into splat x : res_type. +/// Replace transpose(splat-like(v)) with broadcast(v) class FoldTransposeSplat final : public OpRewritePattern<TransposeOp> { public: using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(TransposeOp transposeOp, PatternRewriter &rewriter) const override { - auto splatOp = transposeOp.getVector().getDefiningOp<vector::SplatOp>(); - if (!splatOp) + Value splat = getScalarSplatSource(transposeOp.getVector()); + if (!splat) return failure(); - rewriter.replaceOpWithNewOp<vector::SplatOp>( - transposeOp, transposeOp.getResultVectorType(), splatOp.getInput()); + rewriter.replaceOpWithNewOp<vector::BroadcastOp>( + transposeOp, transposeOp.getResultVectorType(), splat); return success(); } }; @@ -7117,6 +7137,23 @@ OpFoldResult SplatOp::fold(FoldAdaptor adaptor) { return SplatElementsAttr::get(getType(), {constOperand}); } +// Canonicalizer for vector.splat. It always gets canonicalized to a +// vector.broadcast. +class SplatToBroadcastPattern final : public OpRewritePattern<SplatOp> { +public: + using OpRewritePattern<SplatOp>::OpRewritePattern; + LogicalResult matchAndRewrite(SplatOp splatOp, + PatternRewriter &rewriter) const override { + rewriter.replaceOpWithNewOp<vector::BroadcastOp>(splatOp, splatOp.getType(), + splatOp.getOperand()); + return success(); + } +}; +void SplatOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results.add<SplatToBroadcastPattern>(context); +} + void SplatOp::inferResultRanges(ArrayRef<ConstantIntRanges> argRanges, SetIntRangeFn setResultRanges) { setResultRanges(getResult(), argRanges.front()); diff --git a/mlir/lib/Rewrite/PatternApplicator.cpp b/mlir/lib/Rewrite/PatternApplicator.cpp index b2b372b..e13bcff 100644 --- a/mlir/lib/Rewrite/PatternApplicator.cpp +++ b/mlir/lib/Rewrite/PatternApplicator.cpp @@ -13,7 +13,7 @@ #include "mlir/Rewrite/PatternApplicator.h" #include "ByteCode.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLog.h" #ifndef NDEBUG #include "llvm/ADT/ScopeExit.h" @@ -51,9 +51,7 @@ static Operation *getDumpRootOp(Operation *op) { return op; } static void logSucessfulPatternApplication(Operation *op) { - llvm::dbgs() << "// *** IR Dump After Pattern Application ***\n"; - op->dump(); - llvm::dbgs() << "\n\n"; + LDBG(2) << "// *** IR Dump After Pattern Application ***\n" << *op << "\n"; } #endif @@ -208,8 +206,8 @@ LogicalResult PatternApplicator::matchAndRewrite( result = bytecode->rewrite(rewriter, *pdlMatch, *mutableByteCodeState); } else { - LLVM_DEBUG(llvm::dbgs() << "Trying to match \"" - << bestPattern->getDebugName() << "\"\n"); + LDBG() << "Trying to match \"" << bestPattern->getDebugName() + << "\""; const auto *pattern = static_cast<const RewritePattern *>(bestPattern); @@ -223,9 +221,8 @@ LogicalResult PatternApplicator::matchAndRewrite( [&] { rewriter.setListener(oldListener); }); #endif result = pattern->matchAndRewrite(op, rewriter); - LLVM_DEBUG(llvm::dbgs() - << "\"" << bestPattern->getDebugName() << "\" result " - << succeeded(result) << "\n"); + LDBG() << " -> matchAndRewrite " + << (succeeded(result) ? "successful" : "failed"); } // Process the result of the pattern application. diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index dcd2e11..8e83e45 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -333,7 +333,8 @@ private: /// Determine whether expression \p op should be emitted in a deferred way. static bool hasDeferredEmission(Operation *op) { return isa_and_nonnull<emitc::GetGlobalOp, emitc::LiteralOp, emitc::MemberOp, - emitc::MemberOfPtrOp, emitc::SubscriptOp>(op); + emitc::MemberOfPtrOp, emitc::SubscriptOp, + emitc::GetFieldOp>(op); } /// Determine whether expression \p expressionOp should be emitted inline, i.e. @@ -1049,25 +1050,17 @@ static LogicalResult printOperation(CppEmitter &emitter, ClassOp classOp) { static LogicalResult printOperation(CppEmitter &emitter, FieldOp fieldOp) { raw_ostream &os = emitter.ostream(); - if (failed(emitter.emitType(fieldOp->getLoc(), fieldOp.getType()))) + if (failed(emitter.emitVariableDeclaration( + fieldOp->getLoc(), fieldOp.getType(), fieldOp.getSymName()))) return failure(); - os << " " << fieldOp.getSymName() << ";"; - return success(); -} - -static LogicalResult printOperation(CppEmitter &emitter, - GetFieldOp getFieldOp) { - raw_indented_ostream &os = emitter.ostream(); - - Value result = getFieldOp.getResult(); - if (failed(emitter.emitType(getFieldOp->getLoc(), result.getType()))) - return failure(); - os << " "; - if (failed(emitter.emitOperand(result))) - return failure(); - os << " = "; + std::optional<Attribute> initialValue = fieldOp.getInitialValue(); + if (initialValue) { + os << " = "; + if (failed(emitter.emitAttribute(fieldOp->getLoc(), *initialValue))) + return failure(); + } - os << getFieldOp.getFieldName().str(); + os << ";"; return success(); } @@ -1204,7 +1197,7 @@ static LogicalResult printOperation(CppEmitter &emitter, os << ") {\n"; if (failed(printFunctionBody(emitter, operation, functionOp.getBlocks()))) return failure(); - os << "}\n"; + os << "}"; return success(); } @@ -1245,7 +1238,7 @@ static LogicalResult printOperation(CppEmitter &emitter, os << ") {\n"; if (failed(printFunctionBody(emitter, operation, functionOp.getBlocks()))) return failure(); - os << "}\n"; + os << "}"; return success(); } @@ -1700,12 +1693,11 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) { emitc::CmpOp, emitc::ConditionalOp, emitc::ConstantOp, emitc::DeclareFuncOp, emitc::DivOp, emitc::ExpressionOp, emitc::FieldOp, emitc::FileOp, emitc::ForOp, emitc::FuncOp, - emitc::GetFieldOp, emitc::GlobalOp, emitc::IfOp, - emitc::IncludeOp, emitc::LoadOp, emitc::LogicalAndOp, - emitc::LogicalNotOp, emitc::LogicalOrOp, emitc::MulOp, - emitc::RemOp, emitc::ReturnOp, emitc::SubOp, emitc::SwitchOp, - emitc::UnaryMinusOp, emitc::UnaryPlusOp, emitc::VariableOp, - emitc::VerbatimOp>( + emitc::GlobalOp, emitc::IfOp, emitc::IncludeOp, emitc::LoadOp, + emitc::LogicalAndOp, emitc::LogicalNotOp, emitc::LogicalOrOp, + emitc::MulOp, emitc::RemOp, emitc::ReturnOp, emitc::SubOp, + emitc::SwitchOp, emitc::UnaryMinusOp, emitc::UnaryPlusOp, + emitc::VariableOp, emitc::VerbatimOp>( [&](auto op) { return printOperation(*this, op); }) // Func ops. @@ -1715,6 +1707,10 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) { cacheDeferredOpResult(op.getResult(), op.getName()); return success(); }) + .Case<emitc::GetFieldOp>([&](auto op) { + cacheDeferredOpResult(op.getResult(), op.getFieldName()); + return success(); + }) .Case<emitc::LiteralOp>([&](auto op) { cacheDeferredOpResult(op.getResult(), op.getValue()); return success(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index b3577c6..90462d1 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -164,6 +164,42 @@ static llvm::Intrinsic::ID getLdMatrixIntrinsicId(NVVM::MMALayout layout, } } +/// Return the intrinsic ID associated with stmatrix for the given paramters. +static llvm::Intrinsic::ID +getStMatrixIntrinsicId(NVVM::MMALayout layout, int32_t num, + NVVM::LdStMatrixShapeAttr shape, + NVVM::LdStMatrixEltType eltType) { + if (shape.getM() == 8 && shape.getN() == 8) { + switch (num) { + case 1: + return (layout == NVVM::MMALayout::row) + ? llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m8n8_x1_b16 + : llvm::Intrinsic:: + nvvm_stmatrix_sync_aligned_m8n8_x1_trans_b16; + case 2: + return (layout == NVVM::MMALayout::row) + ? llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m8n8_x2_b16 + : llvm::Intrinsic:: + nvvm_stmatrix_sync_aligned_m8n8_x2_trans_b16; + case 4: + return (layout == NVVM::MMALayout::row) + ? llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m8n8_x4_b16 + : llvm::Intrinsic:: + nvvm_stmatrix_sync_aligned_m8n8_x4_trans_b16; + } + } else if (shape.getM() == 16 && shape.getN() == 8) { + switch (num) { + case 1: + return llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m16n8_x1_trans_b8; + case 2: + return llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m16n8_x2_trans_b8; + case 4: + return llvm::Intrinsic::nvvm_stmatrix_sync_aligned_m16n8_x4_trans_b8; + } + } + llvm_unreachable("unknown stmatrix kind"); +} + /// Return the intrinsic ID associated with st.bulk for the given address type. static llvm::Intrinsic::ID getStBulkIntrinsicId(LLVM::LLVMPointerType addrType) { diff --git a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp index 88931b5..d0ae513 100644 --- a/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp +++ b/mlir/lib/Target/SPIRV/Deserialization/Deserializer.cpp @@ -1779,7 +1779,7 @@ LogicalResult spirv::Deserializer::processConstantNull(ArrayRef<uint32_t> operands) { if (operands.size() != 2) { return emitError(unknownLoc, - "OpConstantNull must have type <id> and result <id>"); + "OpConstantNull must only have type <id> and result <id>"); } Type resultType = getType(operands[0]); @@ -1789,8 +1789,15 @@ spirv::Deserializer::processConstantNull(ArrayRef<uint32_t> operands) { } auto resultID = operands[1]; + Attribute attr; if (resultType.isIntOrFloat() || isa<VectorType>(resultType)) { - auto attr = opBuilder.getZeroAttr(resultType); + attr = opBuilder.getZeroAttr(resultType); + } else if (auto tensorType = dyn_cast<TensorArmType>(resultType)) { + if (auto element = opBuilder.getZeroAttr(tensorType.getElementType())) + attr = DenseElementsAttr::get(tensorType, element); + } + + if (attr) { // For normal constants, we just record the attribute (and its type) for // later materialization at use sites. constantMap.try_emplace(resultID, attr, resultType); diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp index 737f296..3053663 100644 --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp @@ -69,6 +69,25 @@ static Block *getPhiIncomingBlock(Block *block) { return block; } +static bool isZeroValue(Attribute attr) { + if (auto floatAttr = dyn_cast<FloatAttr>(attr)) { + return floatAttr.getValue().isZero(); + } + if (auto boolAttr = dyn_cast<BoolAttr>(attr)) { + return !boolAttr.getValue(); + } + if (auto intAttr = dyn_cast<IntegerAttr>(attr)) { + return intAttr.getValue().isZero(); + } + if (auto splatElemAttr = dyn_cast<SplatElementsAttr>(attr)) { + return isZeroValue(splatElemAttr.getSplatValue<Attribute>()); + } + if (auto denseElemAttr = dyn_cast<DenseElementsAttr>(attr)) { + return all_of(denseElemAttr.getValues<Attribute>(), isZeroValue); + } + return false; +} + namespace mlir { namespace spirv { @@ -959,6 +978,11 @@ Serializer::prepareDenseElementsConstant(Location loc, Type constType, return 0; } } else if (isa<spirv::TensorArmType>(constType)) { + if (isZeroValue(valueAttr)) { + encodeInstructionInto(typesGlobalValues, spirv::Opcode::OpConstantNull, + {typeID, resultID}); + return resultID; + } numberOfConstituents = shapedType.getNumElements(); operands.reserve(numberOfConstituents + 2); for (int i = 0; i < numberOfConstituents; ++i) { @@ -1163,6 +1187,21 @@ uint32_t Serializer::prepareConstantFp(Location loc, FloatAttr floatAttr, return resultID; } +// Returns type of attribute. In case of a TypedAttr this will simply return +// the type. But for an ArrayAttr which is untyped and can be multidimensional +// it creates the ArrayType recursively. +static Type getValueType(Attribute attr) { + if (auto typedAttr = dyn_cast<TypedAttr>(attr)) { + return typedAttr.getType(); + } + + if (auto arrayAttr = dyn_cast<ArrayAttr>(attr)) { + return spirv::ArrayType::get(getValueType(arrayAttr[0]), arrayAttr.size()); + } + + return nullptr; +} + uint32_t Serializer::prepareConstantCompositeReplicate(Location loc, Type resultType, Attribute valueAttr) { @@ -1176,18 +1215,9 @@ uint32_t Serializer::prepareConstantCompositeReplicate(Location loc, return 0; } - Type valueType; - if (auto typedAttr = dyn_cast<TypedAttr>(valueAttr)) { - valueType = typedAttr.getType(); - } else if (auto arrayAttr = dyn_cast<ArrayAttr>(valueAttr)) { - auto typedElemAttr = dyn_cast<TypedAttr>(arrayAttr[0]); - if (!typedElemAttr) - return 0; - valueType = - spirv::ArrayType::get(typedElemAttr.getType(), arrayAttr.size()); - } else { + Type valueType = getValueType(valueAttr); + if (!valueAttr) return 0; - } auto compositeType = dyn_cast<CompositeType>(resultType); if (!compositeType) @@ -1202,11 +1232,14 @@ uint32_t Serializer::prepareConstantCompositeReplicate(Location loc, } uint32_t resultID = getNextID(); - uint32_t operands[] = {typeID, resultID, constandID}; - - encodeInstructionInto(typesGlobalValues, - spirv::Opcode::OpConstantCompositeReplicateEXT, - operands); + if (dyn_cast<spirv::TensorArmType>(resultType) && isZeroValue(valueAttr)) { + encodeInstructionInto(typesGlobalValues, spirv::Opcode::OpConstantNull, + {typeID, resultID}); + } else { + encodeInstructionInto(typesGlobalValues, + spirv::Opcode::OpConstantCompositeReplicateEXT, + {typeID, resultID, constandID}); + } constCompositeReplicateIDMap[valueTypePair] = resultID; return resultID; diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp index bdcdaa4..de714d8b 100644 --- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -501,8 +501,7 @@ performActions(raw_ostream &os, << "bytecode version while not emitting bytecode"; AsmState asmState(op.get(), OpPrintingFlags(), /*locationMap=*/nullptr, &fallbackResourceMap); - op.get()->print(os, asmState); - os << '\n'; + os << OpWithState(op.get(), asmState) << '\n'; return success(); } diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 08803e0..f23c619 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -20,6 +20,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLog.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/SaveAndRestore.h" #include "llvm/Support/ScopedPrinter.h" @@ -1129,8 +1130,13 @@ struct ConversionPatternRewriterImpl : public RewriterBase::Listener { /// verification. SmallPtrSet<Operation *, 1> pendingRootUpdates; + /// A raw output stream used to prefix the debug log. + llvm::impl::raw_ldbg_ostream os{(Twine("[") + DEBUG_TYPE + "] ").str(), + llvm::dbgs(), /*HasPendingNewline=*/false}; + /// A logger used to emit diagnostics during the conversion process. - llvm::ScopedPrinter logger{llvm::dbgs()}; + llvm::ScopedPrinter logger{os}; + std::string logPrefix; #endif }; } // namespace detail diff --git a/mlir/test/Conversion/GPUToSPIRV/lookup-target-env.mlir b/mlir/test/Conversion/GPUToSPIRV/lookup-target-env.mlir new file mode 100644 index 0000000..983747b --- /dev/null +++ b/mlir/test/Conversion/GPUToSPIRV/lookup-target-env.mlir @@ -0,0 +1,40 @@ +// RUN: mlir-opt --split-input-file --convert-gpu-to-spirv %s | FileCheck %s + +module attributes {gpu.container_module} { + // CHECK-LABEL: spirv.module @{{.*}} GLSL450 + gpu.module @kernels [#spirv.target_env<#spirv.vce<v1.0, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>] { + // CHECK: spirv.func @load_kernel + // CHECK-SAME: %[[ARG:.*]]: !spirv.ptr<!spirv.struct<(!spirv.array<48 x f32, stride=4> [0])>, StorageBuffer> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>}) + gpu.func @load_kernel(%arg0: memref<12x4xf32>) kernel attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} { + %c0 = arith.constant 0 : index + // CHECK: %[[PTR:.*]] = spirv.AccessChain %[[ARG]]{{\[}}{{%.*}}, {{%.*}}{{\]}} + // CHECK-NEXT: {{%.*}} = spirv.Load "StorageBuffer" %[[PTR]] : f32 + %0 = memref.load %arg0[%c0, %c0] : memref<12x4xf32> + // CHECK: spirv.Return + gpu.return + } + } +} + +// ----- +// Checks that the `-convert-gpu-to-spirv` pass selects the first +// `spirv.target_env` from the `targets` array attribute attached to `gpu.module`. +module attributes {gpu.container_module} { + // CHECK-LABEL: spirv.module @{{.*}} GLSL450 + // CHECK-SAME: #spirv.target_env<#spirv.vce<v1.4, [Shader], [SPV_KHR_storage_buffer_storage_class]> + gpu.module @kernels [ + #spirv.target_env<#spirv.vce<v1.4, [Shader], [SPV_KHR_storage_buffer_storage_class]>, #spirv.resource_limits<>>, + #spirv.target_env<#spirv.vce<v1.0, [Kernel], []>, #spirv.resource_limits<>>, + #spirv.target_env<#spirv.vce<v1.0, [Shader], []>, #spirv.resource_limits<>>] { + // CHECK: spirv.func @load_kernel + // CHECK-SAME: %[[ARG:.*]]: !spirv.ptr<!spirv.struct<(!spirv.array<48 x f32, stride=4> [0])>, StorageBuffer> {spirv.interface_var_abi = #spirv.interface_var_abi<(0, 0)>}) + gpu.func @load_kernel(%arg0: memref<12x4xf32>) kernel attributes {spirv.entry_point_abi = #spirv.entry_point_abi<workgroup_size = [16, 1, 1]>} { + %c0 = arith.constant 0 : index + // CHECK: %[[PTR:.*]] = spirv.AccessChain %[[ARG]]{{\[}}{{%.*}}, {{%.*}}{{\]}} + // CHECK-NEXT: {{%.*}} = spirv.Load "StorageBuffer" %[[PTR]] : f32 + %0 = memref.load %arg0[%c0, %c0] : memref<12x4xf32> + // CHECK: spirv.Return + gpu.return + } + } +} diff --git a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir index 8d720ce..580b09d 100644 --- a/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir +++ b/mlir/test/Conversion/NVVMToLLVM/nvvm-to-llvm.mlir @@ -580,30 +580,6 @@ func.func @elect_one_leader_sync() { // ----- -// CHECK-LABEL: @stmatrix( -// CHECK-SAME: %[[arg0:[a-zA-Z0-9_]+]]: !llvm.ptr<3>, -// CHECK-SAME: %[[arg1:[a-zA-Z0-9_]+]]: i32, -// CHECK-SAME: %[[arg2:[a-zA-Z0-9_]+]]: i32, -// CHECK-SAME: %[[arg3:[a-zA-Z0-9_]+]]: i32, -// CHECK-SAME: %[[arg4:[a-zA-Z0-9_]+]]: i32) -llvm.func @stmatrix(%arg0 : !llvm.ptr<3>, %m1 : i32, %m2 : i32, %m3 : i32, %m4 : i32) { -// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "stmatrix.sync.aligned.x1.m8n8.shared.b16 [$0], {$1};", "r,r" %[[arg0]], %[[arg1]] : (!llvm.ptr<3>, i32) -> () -// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "stmatrix.sync.aligned.x2.m8n8.shared.b16 [$0], {$1, $2};", "r,r,r" %[[arg0]], %[[arg1]], %[[arg2]] : (!llvm.ptr<3>, i32, i32) -> () -// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "stmatrix.sync.aligned.x4.m8n8.shared.b16 [$0], {$1, $2, $3, $4};", "r,r,r,r,r" %[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]], %[[arg4]] : (!llvm.ptr<3>, i32, i32, i32, i32) -> () -// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "stmatrix.sync.aligned.x1.trans.m8n8.shared.b16 [$0], {$1};", "r,r" %[[arg0]], %[[arg1]] : (!llvm.ptr<3>, i32) -> () -// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "stmatrix.sync.aligned.x2.trans.m8n8.shared.b16 [$0], {$1, $2};", "r,r,r" %[[arg0]], %[[arg1]], %[[arg2]] : (!llvm.ptr<3>, i32, i32) -> () -// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "stmatrix.sync.aligned.x4.trans.m8n8.shared.b16 [$0], {$1, $2, $3, $4};", "r,r,r,r,r" %[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]], %[[arg4]] : (!llvm.ptr<3>, i32, i32, i32, i32) -> () - nvvm.stmatrix %arg0, %m1 {layout = #nvvm.mma_layout<row>} : !llvm.ptr<3>, i32 - nvvm.stmatrix %arg0, %m1, %m2 {layout = #nvvm.mma_layout<row>} : !llvm.ptr<3>, i32, i32 - nvvm.stmatrix %arg0, %m1, %m2, %m3, %m4 {layout = #nvvm.mma_layout<row>} : !llvm.ptr<3>, i32, i32, i32, i32 - nvvm.stmatrix %arg0, %m1 {layout = #nvvm.mma_layout<col>} : !llvm.ptr<3>, i32 - nvvm.stmatrix %arg0, %m1, %m2 {layout = #nvvm.mma_layout<col>} : !llvm.ptr<3>, i32, i32 - nvvm.stmatrix %arg0, %m1, %m2, %m3, %m4 {layout = #nvvm.mma_layout<col>} : !llvm.ptr<3>, i32, i32, i32, i32 - llvm.return -} - -// ----- - // CHECK-LABEL: @init_mbarrier_arrive_expect_tx llvm.func @init_mbarrier_arrive_expect_tx(%desc : !llvm.ptr, %pred : i1) { //CHECK: llvm.inline_asm has_side_effects asm_dialect = att "prefetch.tensormap [$0];", "l" diff --git a/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir b/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir index e6fdb7a..ef0fa08 100644 --- a/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir +++ b/mlir/test/Conversion/SCFToControlFlow/convert-to-cfg.mlir @@ -708,4 +708,45 @@ func.func @simple_std_for_loops_annotation(%arg0 : index, %arg1 : index, %arg2 : } {llvm.loop_annotation = #full_unroll} } {llvm.loop_annotation = #no_unroll} return -}
\ No newline at end of file +} + +// ----- + +// CHECK: #[[LOOP_UNROLL_DISABLE:.*]] = #llvm.loop_unroll<disable = true> +// CHECK: #[[NO_UNROLL:.*]] = #llvm.loop_annotation<unroll = #[[LOOP_UNROLL_DISABLE]]> +// CHECK: func @simple_while_loops_annotation +// CHECK: cf.br +// CHECK: cf.cond_br {{.*}} {llvm.loop_annotation = #[[NO_UNROLL]]} +// CHECK: return +#no_unroll = #llvm.loop_annotation<unroll = <disable = true>> +func.func @simple_while_loops_annotation(%arg0 : i1) { + scf.while : () -> () { + scf.condition(%arg0) + } do { + scf.yield + } attributes {llvm.loop_annotation = #no_unroll} + return +} + +// ----- + +// CHECK: #[[LOOP_UNROLL_DISABLE:.*]] = #llvm.loop_unroll<disable = true> +// CHECK: #[[NO_UNROLL:.*]] = #llvm.loop_annotation<unroll = #[[LOOP_UNROLL_DISABLE]]> +// CHECK: func @do_while_loops_annotation +// CHECK: cf.br +// CHECK: cf.cond_br +// CHECK: cf.br {{.*}} {llvm.loop_annotation = #[[NO_UNROLL]]} +// CHECK: return +#no_unroll = #llvm.loop_annotation<unroll = <disable = true>> +func.func @do_while_loops_annotation() { + %c0_i32 = arith.constant 0 : i32 + scf.while (%arg2 = %c0_i32) : (i32) -> (i32) { + %0 = "test.make_condition"() : () -> i1 + scf.condition(%0) %c0_i32 : i32 + } do { + ^bb0(%arg2: i32): + scf.yield %c0_i32: i32 + } attributes {llvm.loop_annotation = #no_unroll} + return +} + diff --git a/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir b/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir index 3adafc1..c703274 100644 --- a/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/arithmetic-ops.mlir @@ -13,7 +13,7 @@ func.func @fadd_scalar(%arg: f32) -> f32 { // ----- func.func @fadd_bf16_scalar(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FAdd %arg, %arg : bf16 return %0 : bf16 } @@ -33,7 +33,7 @@ func.func @fdiv_scalar(%arg: f32) -> f32 { // ----- func.func @fdiv_bf16_scalar(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FDiv %arg, %arg : bf16 return %0 : bf16 } @@ -53,7 +53,7 @@ func.func @fmod_scalar(%arg: f32) -> f32 { // ----- func.func @fmod_bf16_scalar(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FMod %arg, %arg : bf16 return %0 : bf16 } @@ -79,7 +79,7 @@ func.func @fmul_vector(%arg: vector<4xf32>) -> vector<4xf32> { // ----- func.func @fmul_i32(%arg: i32) -> i32 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FMul %arg, %arg : i32 return %0 : i32 } @@ -87,7 +87,7 @@ func.func @fmul_i32(%arg: i32) -> i32 { // ----- func.func @fmul_bf16(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FMul %arg, %arg : bf16 return %0 : bf16 } @@ -95,7 +95,7 @@ func.func @fmul_bf16(%arg: bf16) -> bf16 { // ----- func.func @fmul_bf16_vector(%arg: vector<4xbf16>) -> vector<4xbf16> { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FMul %arg, %arg : vector<4xbf16> return %0 : vector<4xbf16> } @@ -103,7 +103,7 @@ func.func @fmul_bf16_vector(%arg: vector<4xbf16>) -> vector<4xbf16> { // ----- func.func @fmul_tensor(%arg: tensor<4xf32>) -> tensor<4xf32> { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FMul %arg, %arg : tensor<4xf32> return %0 : tensor<4xf32> } @@ -123,7 +123,7 @@ func.func @fnegate_scalar(%arg: f32) -> f32 { // ----- func.func @fnegate_bf16_scalar(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FNegate %arg : bf16 return %0 : bf16 } @@ -143,7 +143,7 @@ func.func @frem_scalar(%arg: f32) -> f32 { // ----- func.func @frem_bf16_scalar(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FRem %arg, %arg : bf16 return %0 : bf16 } @@ -163,7 +163,7 @@ func.func @fsub_scalar(%arg: f32) -> f32 { // ----- func.func @fsub_bf16_scalar(%arg: bf16) -> bf16 { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.FSub %arg, %arg : bf16 return %0 : bf16 } @@ -348,7 +348,7 @@ func.func @dot(%arg0: vector<4xf32>, %arg1: vector<4xf32>) -> f16 { // ----- func.func @dot(%arg0: vector<4xi32>, %arg1: vector<4xi32>) -> i32 { - // expected-error @+1 {{'spirv.Dot' op operand #0 must be vector of 16/32/64-bit float or BFloat16 values of length 2/3/4/8/16}} + // expected-error @+1 {{'spirv.Dot' op operand #0 must be fixed-length vector of 16/32/64-bit float or BFloat16 values of length 2/3/4/8/16}} %0 = spirv.Dot %arg0, %arg1 : vector<4xi32> -> i32 return %0 : i32 } @@ -558,7 +558,7 @@ func.func @vector_times_scalar(%vector: vector<4xf32>, %scalar: f32) -> vector<3 // ----- func.func @vector_bf16_times_scalar_bf16(%vector: vector<4xbf16>, %scalar: bf16) -> vector<4xbf16> { - // expected-error @+1 {{op operand #0 must be vector of 16/32/64-bit float values of length 2/3/4}} + // expected-error @+1 {{operand #0 must be vector of 16/32/64-bit float values of length 2/3/4}} %0 = spirv.VectorTimesScalar %vector, %scalar : (vector<4xbf16>, bf16) -> vector<4xbf16> return %0 : vector<4xbf16> } diff --git a/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir b/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir index f3f0ebf..4bdac19 100644 --- a/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/bit-ops.mlir @@ -137,7 +137,7 @@ func.func @bitwise_or_all_ones_vector(%arg: vector<3xi8>) -> vector<3xi8> { // ----- func.func @bitwise_or_float(%arg0: f16, %arg1: f16) -> f16 { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4}} %0 = spirv.BitwiseOr %arg0, %arg1 : f16 return %0 : f16 } @@ -165,7 +165,7 @@ func.func @bitwise_xor_vector(%arg: vector<4xi32>) -> vector<4xi32> { // ----- func.func @bitwise_xor_float(%arg0: f16, %arg1: f16) -> f16 { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4}} %0 = spirv.BitwiseXor %arg0, %arg1 : f16 return %0 : f16 } @@ -274,7 +274,7 @@ func.func @bitwise_and_zext_vector(%arg: vector<2xi8>) -> vector<2xi32> { // ----- func.func @bitwise_and_float(%arg0: f16, %arg1: f16) -> f16 { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4}} %0 = spirv.BitwiseAnd %arg0, %arg1 : f16 return %0 : f16 } diff --git a/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir b/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir index 5c5d94c..fd8a2ff 100644 --- a/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/gl-ops.mlir @@ -19,7 +19,7 @@ func.func @expvec(%arg0 : vector<3xf16>) -> () { // ----- func.func @exp(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32-bit float or vector of 16/32-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32-bit float or fixed-length vector of 16/32-bit float values}} %2 = spirv.GL.Exp %arg0 : i32 return } @@ -27,7 +27,7 @@ func.func @exp(%arg0 : i32) -> () { // ----- func.func @exp(%arg0 : vector<5xf32>) -> () { - // expected-error @+1 {{op operand #0 must be 16/32-bit float or vector of 16/32-bit float values of length 2/3/4}} + // expected-error @+1 {{op operand #0 must be 16/32-bit float or fixed-length vector of 16/32-bit float values of length 2/3/4}} %2 = spirv.GL.Exp %arg0 : vector<5xf32> return } @@ -51,7 +51,7 @@ func.func @exp(%arg0 : i32) -> () { // ----- func.func @exp_bf16(%arg0 : bf16) -> () { - // expected-error @+1 {{op operand #0 must be 16/32-bit float or vector of 16/32-bit float values of length 2/3/4}} + // expected-error @+1 {{op operand #0 must be 16/32-bit float or fixed-length vector of 16/32-bit float values of length 2/3/4}} %2 = spirv.GL.Exp %arg0 : bf16 return } @@ -101,7 +101,7 @@ func.func @iminmax(%arg0: i32, %arg1: i32) { // ----- func.func @fmaxminbf16vec(%arg0 : vector<3xbf16>, %arg1 : vector<3xbf16>) { - // expected-error @+1 {{operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %1 = spirv.GL.FMax %arg0, %arg1 : vector<3xbf16> %2 = spirv.GL.FMin %arg0, %arg1 : vector<3xbf16> return @@ -499,7 +499,7 @@ func.func @frexp_struct_mismatch_type(%arg0 : f32) -> () { // ----- func.func @frexp_struct_wrong_type(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %2 = spirv.GL.FrexpStruct %arg0 : i32 -> !spirv.struct<(i32, i32)> return } @@ -614,7 +614,7 @@ func.func @findimsb_vector_i64(%arg0 : vector<3xi64>) -> () { // ----- func.func @findimsb_error_scalar_float(%arg0 : f32) -> () { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/1}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/1}} %2 = spirv.GL.FindILsb %arg0 : f32 return } @@ -640,7 +640,7 @@ func.func @findsmsb_vector(%arg0 : vector<3xi32>) -> () { // ----- func.func @findsmsb_error_scalar_i64(%arg0 : i64) -> () { - // expected-error @+1 {{operand #0 must be Int32 or vector of Int32}} + // expected-error @+1 {{operand #0 must be Int32 or fixed-length vector of Int32}} %2 = spirv.GL.FindSMsb %arg0 : i64 return } @@ -666,7 +666,7 @@ func.func @findumsb_vector(%arg0 : vector<3xi32>) -> () { // ----- func.func @findumsb(%arg0 : i64) -> () { - // expected-error @+1 {{operand #0 must be Int32 or vector of Int32}} + // expected-error @+1 {{operand #0 must be Int32 or fixed-length vector of Int32}} %2 = spirv.GL.FindUMsb %arg0 : i64 return } @@ -692,7 +692,7 @@ func.func @distance_vector(%arg0 : vector<3xf32>, %arg1 : vector<3xf32>) { // ----- func.func @distance_invalid_type(%arg0 : i32, %arg1 : i32) { - // expected-error @+1 {{'spirv.GL.Distance' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16}} + // expected-error @+1 {{'spirv.GL.Distance' op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16}} %0 = spirv.GL.Distance %arg0, %arg1 : i32, i32 -> f32 return } @@ -708,7 +708,7 @@ func.func @distance_arg_mismatch(%arg0 : vector<3xf32>, %arg1 : vector<4xf32>) { // ----- func.func @distance_invalid_vector_size(%arg0 : vector<5xf32>, %arg1 : vector<5xf32>) { - // expected-error @+1 {{'spirv.GL.Distance' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16}} + // expected-error @+1 {{'spirv.GL.Distance' op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16}} %0 = spirv.GL.Distance %arg0, %arg1 : vector<5xf32>, vector<5xf32> -> f32 return } @@ -736,7 +736,7 @@ func.func @cross(%arg0 : vector<3xf32>, %arg1 : vector<3xf32>) { // ----- func.func @cross_invalid_type(%arg0 : vector<3xi32>, %arg1 : vector<3xi32>) { - // expected-error @+1 {{'spirv.GL.Cross' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'vector<3xi32>'}} + // expected-error @+1 {{'spirv.GL.Cross' op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'vector<3xi32>'}} %0 = spirv.GL.Cross %arg0, %arg1 : vector<3xi32> return } @@ -762,7 +762,7 @@ func.func @normalize_vector(%arg0 : vector<3xf32>) { // ----- func.func @normalize_invalid_type(%arg0 : i32) { - // expected-error @+1 {{'spirv.GL.Normalize' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{'spirv.GL.Normalize' op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.GL.Normalize %arg0 : i32 return } @@ -788,7 +788,7 @@ func.func @reflect_vector(%arg0 : vector<3xf32>, %arg1 : vector<3xf32>) { // ----- func.func @reflect_invalid_type(%arg0 : i32, %arg1 : i32) { - // expected-error @+1 {{'spirv.GL.Reflect' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{'spirv.GL.Reflect' op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.GL.Reflect %arg0, %arg1 : i32 return } @@ -814,7 +814,7 @@ func.func @fractvec(%arg0 : vector<3xf16>) -> () { // ----- func.func @fract_invalid_type(%arg0 : i32) { - // expected-error @+1 {{'spirv.GL.Fract' op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{'spirv.GL.Fract' op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %0 = spirv.GL.Fract %arg0 : i32 return } @@ -840,7 +840,7 @@ func.func @log2vec(%arg0 : vector<3xf16>) -> () { // ----- func.func @log2_invalid_type(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32-bit float or vector of 16/32-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32-bit float or fixed-length vector of 16/32-bit float values}} %0 = spirv.GL.Log2 %arg0 : i32 return } @@ -866,7 +866,7 @@ func.func @tanhvec(%arg0 : vector<3xf16>) -> () { // ----- func.func @tanh_invalid_type(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32-bit float or vector of 16/32-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32-bit float or fixed-length vector of 16/32-bit float values}} %0 = spirv.GL.Tanh %arg0 : i32 return } @@ -892,7 +892,7 @@ func.func @exp2vec(%arg0 : vector<3xf16>) -> () { // ----- func.func @exp2_invalid_type(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32-bit float or vector of 16/32-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32-bit float or fixed-length vector of 16/32-bit float values}} %0 = spirv.GL.Exp2 %arg0 : i32 return } @@ -1022,7 +1022,7 @@ func.func @lengthvec(%arg0 : vector<3xf32>) -> () { // ----- func.func @length_i32_in(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'i32'}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'i32'}} %0 = spirv.GL.Length %arg0 : i32 -> f32 return } @@ -1038,7 +1038,7 @@ func.func @length_f16_in(%arg0 : f16) -> () { // ----- func.func @length_i32vec_in(%arg0 : vector<3xi32>) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'vector<3xi32>'}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'vector<3xi32>'}} %0 = spirv.GL.Length %arg0 : vector<3xi32> -> f32 return } diff --git a/mlir/test/Dialect/SPIRV/IR/group-ops.mlir b/mlir/test/Dialect/SPIRV/IR/group-ops.mlir index d9957ad8..d7a4a6d 100644 --- a/mlir/test/Dialect/SPIRV/IR/group-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/group-ops.mlir @@ -49,7 +49,7 @@ func.func @group_broadcast_negative_scope(%value: f32, %localid: vector<3xi32> ) // ----- func.func @group_broadcast_negative_locid_dtype(%value: f32, %localid: vector<3xf32> ) -> f32 { - // expected-error @+1 {{operand #1 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values}} + // expected-error @+1 {{op operand #1 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values}} %0 = spirv.GroupBroadcast <Subgroup> %value, %localid : f32, vector<3xf32> return %0: f32 } diff --git a/mlir/test/Dialect/SPIRV/IR/image-ops.mlir b/mlir/test/Dialect/SPIRV/IR/image-ops.mlir index d3aaef7..320a8fa 100644 --- a/mlir/test/Dialect/SPIRV/IR/image-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/image-ops.mlir @@ -349,7 +349,7 @@ func.func @image_fetch_2d_result(%arg0: !spirv.image<f32, Dim2D, NoDepth, NonArr // ----- func.func @image_fetch_float_coords(%arg0: !spirv.image<f32, Dim2D, NoDepth, NonArrayed, SingleSampled, NeedSampler, Rgba8>, %arg1: vector<2xf32>) -> () { - // expected-error @+1 {{op operand #1 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'vector<2xf32>'}} + // expected-error @+1 {{op operand #1 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'vector<2xf32>'}} %0 = spirv.ImageFetch %arg0, %arg1 : !spirv.image<f32, Dim2D, NoDepth, NonArrayed, SingleSampled, NeedSampler, Rgba8>, vector<2xf32> -> vector<2xf32> spirv.Return } diff --git a/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir b/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir index bb15d01..2e2fb1a 100644 --- a/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir @@ -21,7 +21,7 @@ spirv.func @f32_to_bf16_vec(%arg0 : vector<2xf32>) "None" { // ----- spirv.func @f32_to_bf16_unsupported(%arg0 : f64) "None" { - // expected-error @+1 {{operand #0 must be Float32 or vector of Float32 values of length 2/3/4/8/16, but got}} + // expected-error @+1 {{operand #0 must be Float32 or fixed-length vector of Float32 values of length 2/3/4/8/16, but got}} %0 = spirv.INTEL.ConvertFToBF16 %arg0 : f64 to i16 spirv.Return } @@ -29,7 +29,7 @@ spirv.func @f32_to_bf16_unsupported(%arg0 : f64) "None" { // ----- spirv.func @f32_to_bf16_vec_unsupported(%arg0 : vector<2xf32>) "None" { - // expected-error @+1 {{operand and result must have same number of elements}} + // expected-error @+1 {{op requires the same shape for all operands and results}} %0 = spirv.INTEL.ConvertFToBF16 %arg0 : vector<2xf32> to vector<4xi16> spirv.Return } @@ -57,7 +57,7 @@ spirv.func @bf16_to_f32_vec(%arg0 : vector<2xi16>) "None" { // ----- spirv.func @bf16_to_f32_unsupported(%arg0 : i16) "None" { - // expected-error @+1 {{result #0 must be Float32 or vector of Float32 values of length 2/3/4/8/16, but got}} + // expected-error @+1 {{result #0 must be Float32 or fixed-length vector of Float32 values of length 2/3/4/8/16, but got}} %0 = spirv.INTEL.ConvertBF16ToF %arg0 : i16 to f16 spirv.Return } @@ -65,7 +65,7 @@ spirv.func @bf16_to_f32_unsupported(%arg0 : i16) "None" { // ----- spirv.func @bf16_to_f32_vec_unsupported(%arg0 : vector<2xi16>) "None" { - // expected-error @+1 {{operand and result must have same number of elements}} + // expected-error @+1 {{op requires the same shape for all operands and results}} %0 = spirv.INTEL.ConvertBF16ToF %arg0 : vector<2xi16> to vector<3xf32> spirv.Return } @@ -73,6 +73,42 @@ spirv.func @bf16_to_f32_vec_unsupported(%arg0 : vector<2xi16>) "None" { // ----- //===----------------------------------------------------------------------===// +// spirv.INTEL.RoundFToTF32 +//===----------------------------------------------------------------------===// + +spirv.func @f32_to_tf32(%arg0 : f32) "None" { + // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : f32 to f32 + %0 = spirv.INTEL.RoundFToTF32 %arg0 : f32 to f32 + spirv.Return +} + +// ----- + +spirv.func @f32_to_tf32_vec(%arg0 : vector<2xf32>) "None" { + // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : vector<2xf32> to vector<2xf32> + %0 = spirv.INTEL.RoundFToTF32 %arg0 : vector<2xf32> to vector<2xf32> + spirv.Return +} + +// ----- + +spirv.func @f32_to_tf32_unsupported(%arg0 : f64) "None" { + // expected-error @+1 {{op operand #0 must be Float32 or fixed-length vector of Float32 values of length 2/3/4/8/16, but got 'f64'}} + %0 = spirv.INTEL.RoundFToTF32 %arg0 : f64 to f32 + spirv.Return +} + +// ----- + +spirv.func @f32_to_tf32_vec_unsupported(%arg0 : vector<2xf32>) "None" { + // expected-error @+1 {{op requires the same shape for all operands and results}} + %0 = spirv.INTEL.RoundFToTF32 %arg0 : vector<2xf32> to vector<4xf32> + spirv.Return +} + +// ----- + +//===----------------------------------------------------------------------===// // spirv.INTEL.SplitBarrier //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SPIRV/IR/khr-cooperative-matrix-ops.mlir b/mlir/test/Dialect/SPIRV/IR/khr-cooperative-matrix-ops.mlir index 61a35b7..491c7a7 100644 --- a/mlir/test/Dialect/SPIRV/IR/khr-cooperative-matrix-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/khr-cooperative-matrix-ops.mlir @@ -583,7 +583,7 @@ spirv.func @matrix_times_scalar(%a: !spirv.coopmatrix<2x2xf32, Workgroup, Matrix // These binary arithmetic instructions do not support coop matrix operands. spirv.func @fmod(%a: !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA>, %b: !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA>) "None" { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16}} %p = spirv.FMod %a, %b : !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA> spirv.Return } @@ -591,14 +591,14 @@ spirv.func @fmod(%a: !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA>, %b: !spirv.c // ----- spirv.func @frem(%a: !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA>, %b: !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA>) "None" { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16}} %p = spirv.FRem %a, %b : !spirv.coopmatrix<2x2xf32, Subgroup, MatrixA> spirv.Return } // ----- spirv.func @smod(%a: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>, %b: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>) "None" { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16}} %p = spirv.SMod %a, %b : !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA> spirv.Return } @@ -606,7 +606,7 @@ spirv.func @smod(%a: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>, %b: !spirv.c // ----- spirv.func @srem(%a: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>, %b: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>) "None" { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16}} %p = spirv.SRem %a, %b : !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA> spirv.Return } @@ -614,7 +614,7 @@ spirv.func @srem(%a: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>, %b: !spirv.c // ----- spirv.func @umod(%a: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>, %b: !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA>) "None" { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16}} %p = spirv.UMod %a, %b : !spirv.coopmatrix<2x2xi32, Subgroup, MatrixA> spirv.Return } diff --git a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir index 58b8288..d7f4ed0 100644 --- a/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/logical-ops.mlir @@ -184,7 +184,7 @@ func.func @logicalUnary(%arg0 : i1) func.func @logicalUnary(%arg0 : i32) { - // expected-error @+1 {{'operand' must be bool or vector of bool values of length 2/3/4/8/16, but got 'i32'}} + // expected-error @+1 {{'operand' must be bool or fixed-length vector of bool values of length 2/3/4/8/16, but got 'i32'}} %0 = spirv.LogicalNot %arg0 : i32 return } diff --git a/mlir/test/Dialect/SPIRV/IR/non-uniform-ops.mlir b/mlir/test/Dialect/SPIRV/IR/non-uniform-ops.mlir index 7ab94f1..bdb2abd 100644 --- a/mlir/test/Dialect/SPIRV/IR/non-uniform-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/non-uniform-ops.mlir @@ -185,7 +185,7 @@ func.func @group_non_uniform_fmul_clustered_reduce(%val: vector<2xf32>) -> vecto // ----- func.func @group_non_uniform_bf16_fmul_reduce(%val: bf16) -> bf16 { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'bf16'}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'bf16'}} %0 = spirv.GroupNonUniformFMul <Workgroup> <Reduce> %val : bf16 -> bf16 return %0: bf16 } @@ -206,7 +206,7 @@ func.func @group_non_uniform_fmax_reduce(%val: f32) -> f32 { // ----- func.func @group_non_uniform_bf16_fmax_reduce(%val: bf16) -> bf16 { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'bf16'}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4/8/16, but got 'bf16'}} %0 = spirv.GroupNonUniformFMax <Workgroup> <Reduce> %val : bf16 -> bf16 return %0: bf16 } @@ -511,7 +511,7 @@ func.func @group_non_uniform_bitwise_and(%val: i32) -> i32 { // ----- func.func @group_non_uniform_bitwise_and(%val: i1) -> i1 { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'i1'}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'i1'}} %0 = spirv.GroupNonUniformBitwiseAnd <Workgroup> <Reduce> %val : i1 -> i1 return %0: i1 } @@ -532,7 +532,7 @@ func.func @group_non_uniform_bitwise_or(%val: i32) -> i32 { // ----- func.func @group_non_uniform_bitwise_or(%val: i1) -> i1 { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'i1'}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'i1'}} %0 = spirv.GroupNonUniformBitwiseOr <Workgroup> <Reduce> %val : i1 -> i1 return %0: i1 } @@ -553,7 +553,7 @@ func.func @group_non_uniform_bitwise_xor(%val: i32) -> i32 { // ----- func.func @group_non_uniform_bitwise_xor(%val: i1) -> i1 { - // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'i1'}} + // expected-error @+1 {{operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4/8/16, but got 'i1'}} %0 = spirv.GroupNonUniformBitwiseXor <Workgroup> <Reduce> %val : i1 -> i1 return %0: i1 } @@ -574,7 +574,7 @@ func.func @group_non_uniform_logical_and(%val: i1) -> i1 { // ----- func.func @group_non_uniform_logical_and(%val: i32) -> i32 { - // expected-error @+1 {{operand #0 must be bool or vector of bool values of length 2/3/4/8/16, but got 'i32'}} + // expected-error @+1 {{operand #0 must be bool or fixed-length vector of bool values of length 2/3/4/8/16, but got 'i32'}} %0 = spirv.GroupNonUniformLogicalAnd <Workgroup> <Reduce> %val : i32 -> i32 return %0: i32 } @@ -595,7 +595,7 @@ func.func @group_non_uniform_logical_or(%val: i1) -> i1 { // ----- func.func @group_non_uniform_logical_or(%val: i32) -> i32 { - // expected-error @+1 {{operand #0 must be bool or vector of bool values of length 2/3/4/8/16, but got 'i32'}} + // expected-error @+1 {{operand #0 must be bool or fixed-length vector of bool values of length 2/3/4/8/16, but got 'i32'}} %0 = spirv.GroupNonUniformLogicalOr <Workgroup> <Reduce> %val : i32 -> i32 return %0: i32 } @@ -616,7 +616,7 @@ func.func @group_non_uniform_logical_xor(%val: i1) -> i1 { // ----- func.func @group_non_uniform_logical_xor(%val: i32) -> i32 { - // expected-error @+1 {{operand #0 must be bool or vector of bool values of length 2/3/4/8/16, but got 'i32'}} + // expected-error @+1 {{operand #0 must be bool or fixed-length vector of bool values of length 2/3/4/8/16, but got 'i32'}} %0 = spirv.GroupNonUniformLogicalXor <Workgroup> <Reduce> %val : i32 -> i32 return %0: i32 } diff --git a/mlir/test/Dialect/SPIRV/IR/ocl-ops.mlir b/mlir/test/Dialect/SPIRV/IR/ocl-ops.mlir index 8f021ed..6aaaa60 100644 --- a/mlir/test/Dialect/SPIRV/IR/ocl-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/ocl-ops.mlir @@ -19,7 +19,7 @@ func.func @expvec(%arg0 : vector<3xf16>) -> () { // ----- func.func @exp(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %2 = spirv.CL.exp %arg0 : i32 return } @@ -27,7 +27,7 @@ func.func @exp(%arg0 : i32) -> () { // ----- func.func @exp(%arg0 : vector<5xf32>) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4}} %2 = spirv.CL.exp %arg0 : vector<5xf32> return } @@ -75,7 +75,7 @@ func.func @fabsf64(%arg0 : f64) -> () { // ----- func.func @fabs(%arg0 : i32) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values}} %2 = spirv.CL.fabs %arg0 : i32 return } @@ -83,7 +83,7 @@ func.func @fabs(%arg0 : i32) -> () { // ----- func.func @fabs(%arg0 : vector<5xf32>) -> () { - // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or vector of 16/32/64-bit float values of length 2/3/4}} + // expected-error @+1 {{op operand #0 must be 16/32/64-bit float or fixed-length vector of 16/32/64-bit float values of length 2/3/4}} %2 = spirv.CL.fabs %arg0 : vector<5xf32> return } @@ -137,7 +137,7 @@ func.func @sabsi8(%arg0 : i8) -> () { // ----- func.func @sabs(%arg0 : f32) -> () { - // expected-error @+1 {{op operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values}} + // expected-error @+1 {{op operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values}} %2 = spirv.CL.s_abs %arg0 : f32 return } @@ -145,7 +145,7 @@ func.func @sabs(%arg0 : f32) -> () { // ----- func.func @sabs(%arg0 : vector<5xi32>) -> () { - // expected-error @+1 {{op operand #0 must be 8/16/32/64-bit integer or vector of 8/16/32/64-bit integer values of length 2/3/4}} + // expected-error @+1 {{op operand #0 must be 8/16/32/64-bit integer or fixed-length vector of 8/16/32/64-bit integer values of length 2/3/4}} %2 = spirv.CL.s_abs %arg0 : vector<5xi32> return } diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index 6b55442..5150ee3 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -241,6 +241,26 @@ func.func @clamp_f32_is_noop(%arg0: tensor<4xf32>) -> tensor<4xf32> { // ----- +// CHECK-LABEL: @clamp_boolean_is_noop +func.func @clamp_boolean_is_noop(%arg0: tensor<4xi1>) -> tensor<4xi1> { + // CHECK: return %arg0 + // CHECK-NOT: tosa.clamp + %0 = tosa.clamp %arg0 {min_val = false, max_val = true} : (tensor<4xi1>) -> tensor<4xi1> + return %0 : tensor<4xi1> +} + +// ----- + +// CHECK-LABEL: @clamp_boolean_dynamic_is_noop +func.func @clamp_boolean_dynamic_is_noop(%arg0: tensor<?xi1>) -> tensor<?xi1> { + // CHECK: return %arg0 + // CHECK-NOT: tosa.clamp + %0 = tosa.clamp %arg0 {min_val = false, max_val = true} : (tensor<?xi1>) -> tensor<?xi1> + return %0 : tensor<?xi1> +} + +// ----- + // CHECK-LABEL: @clamp_int8_is_noop func.func @clamp_int8_is_noop(%arg0: tensor<4xi8>) -> tensor<4xi8> { // CHECK: return %arg0 diff --git a/mlir/test/Dialect/Tosa/dynamic_extension.mlir b/mlir/test/Dialect/Tosa/dynamic_extension.mlir index 8739f97..e23ce430 100644 --- a/mlir/test/Dialect/Tosa/dynamic_extension.mlir +++ b/mlir/test/Dialect/Tosa/dynamic_extension.mlir @@ -2,7 +2,7 @@ // Check operations when the dynamic extension is enabled. //-------------------------------------------------------- -// RUN: mlir-opt %s -split-input-file -verify-diagnostics -tosa-validate="profile=pro_int,pro_fp extension=dynamic strict-op-spec-alignment allow-invalid-op-datatype-combinations" +// RUN: mlir-opt %s -split-input-file -verify-diagnostics -tosa-validate="profile=pro_int,pro_fp extension=dynamic allow-invalid-op-datatype-combinations" // ----- diff --git a/mlir/test/Dialect/Tosa/level_check.mlir b/mlir/test/Dialect/Tosa/level_check.mlir index bf9ed8a..0184d2b 100644 --- a/mlir/test/Dialect/Tosa/level_check.mlir +++ b/mlir/test/Dialect/Tosa/level_check.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -verify-diagnostics --tosa-validate +// RUN: mlir-opt %s -split-input-file -verify-diagnostics --tosa-validate="extension=dynamic" func.func @test_argmax_rank_invalid(%arg0: tensor<1x1x1x1x29x29x4xf32>) -> tensor<1x1x1x1x29x4xi32> { // expected-error@+1 {{'tosa.argmax' op failed level check: operand rank(shape) <= MAX_RANK}} diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir index 56996b5..f86fb38 100644 --- a/mlir/test/Dialect/Vector/canonicalize.mlir +++ b/mlir/test/Dialect/Vector/canonicalize.mlir @@ -823,11 +823,11 @@ func.func @negative_fold_extract_broadcast(%a : vector<1x1xf32>) -> vector<4xf32 // ----- -// CHECK-LABEL: fold_extract_scalar_from_splat +// CHECK-LABEL: fold_extract_splatlike // CHECK-SAME: %[[A:.*]]: f32 // CHECK: return %[[A]] : f32 -func.func @fold_extract_scalar_from_splat(%a : f32, %idx0 : index, %idx1 : index, %idx2 : index) -> f32 { - %b = vector.splat %a : vector<1x2x4xf32> +func.func @fold_extract_splatlike(%a : f32, %idx0 : index, %idx1 : index, %idx2 : index) -> f32 { + %b = vector.broadcast %a : f32 to vector<1x2x4xf32> %r = vector.extract %b[%idx0, %idx1, %idx2] : f32 from vector<1x2x4xf32> return %r : f32 } @@ -2063,11 +2063,11 @@ func.func @insert_strided_slice_full_range(%source: vector<16x16xf16>, %dest: ve // ----- -// CHECK-LABEL: extract_strided_splat -// CHECK: %[[B:.*]] = vector.splat %{{.*}} : vector<2x4xf16> +// CHECK-LABEL: extract_strided_splatlike +// CHECK: %[[B:.*]] = vector.broadcast %{{.*}} f16 to vector<2x4xf16> // CHECK-NEXT: return %[[B]] : vector<2x4xf16> -func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> { - %0 = vector.splat %arg0 : vector<16x4xf16> +func.func @extract_strided_splatlike(%arg0: f16) -> vector<2x4xf16> { + %0 = vector.broadcast %arg0 : f16 to vector<16x4xf16> %1 = vector.extract_strided_slice %0 {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} : vector<16x4xf16> to vector<2x4xf16> @@ -2353,14 +2353,14 @@ func.func @extract_extract_strided2(%A: vector<2x4xf32>) // ----- -// CHECK-LABEL: func @splat_fold -func.func @splat_fold() -> vector<4xf32> { +// CHECK-LABEL: func @splatlike_fold +// CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> +// CHECK-NEXT: return [[V]] : vector<4xf32> +func.func @splatlike_fold() -> vector<4xf32> { %c = arith.constant 1.0 : f32 - %v = vector.splat %c : vector<4xf32> + %v = vector.broadcast %c : f32 to vector<4xf32> return %v : vector<4xf32> - // CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> - // CHECK-NEXT: return [[V]] : vector<4xf32> } // ----- @@ -2499,10 +2499,10 @@ func.func @shuffle_nofold1(%v0 : vector<4xi32>, %v1 : vector<2xi32>) -> vector<5 // ----- -// CHECK-LABEL: func @transpose_splat_constant +// CHECK-LABEL: func @transpose_splatlike_constant // CHECK: %[[CST:.+]] = arith.constant dense<5.000000e+00> : vector<8x4xf32> // CHECK: return %[[CST]] -func.func @transpose_splat_constant() -> vector<8x4xf32> { +func.func @transpose_splatlike_constant() -> vector<8x4xf32> { %cst = arith.constant dense<5.0> : vector<4x8xf32> %0 = vector.transpose %cst, [1, 0] : vector<4x8xf32> to vector<8x4xf32> return %0 : vector<8x4xf32> @@ -2510,13 +2510,13 @@ func.func @transpose_splat_constant() -> vector<8x4xf32> { // ----- -// CHECK-LABEL: func @transpose_splat2( -// CHECK-SAME: %[[VAL_0:.*]]: f32) -> vector<3x4xf32> { -// CHECK: %[[VAL_1:.*]] = vector.splat %[[VAL_0]] : vector<3x4xf32> -// CHECK: return %[[VAL_1]] : vector<3x4xf32> -// CHECK: } -func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> { - %splat = vector.splat %arg : vector<4x3xf32> +// CHECK-LABEL: func @transpose_splatlike2( +// CHECK-SAME: %[[VAL_0:.*]]: f32) -> vector<3x4xf32> { +// CHECK: %[[VAL_1:.*]] = vector.broadcast %[[VAL_0]] : f32 to vector<3x4xf32> +// CHECK: return %[[VAL_1]] : vector<3x4xf32> +// CHECK: } +func.func @transpose_splatlike2(%arg : f32) -> vector<3x4xf32> { + %splat = vector.broadcast %arg : f32 to vector<4x3xf32> %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32> return %0 : vector<3x4xf32> } @@ -2699,13 +2699,13 @@ func.func @bitcast(%a: vector<4x8xf32>) -> vector<4x16xi16> { // ----- -// CHECK-LABEL: @insert_strided_slice_splat +// CHECK-LABEL: @insert_strided_slice_splatlike // CHECK-SAME: (%[[ARG:.*]]: f32) -// CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<8x16xf32> +// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : f32 to vector<8x16xf32> // CHECK-NEXT: return %[[SPLAT]] : vector<8x16xf32> -func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) { - %splat0 = vector.splat %x : vector<4x4xf32> - %splat1 = vector.splat %x : vector<8x16xf32> +func.func @insert_strided_slice_splatlike(%x: f32) -> (vector<8x16xf32>) { + %splat0 = vector.broadcast %x : f32 to vector<4x4xf32> + %splat1 = vector.broadcast %x : f32 to vector<8x16xf32> %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]} : vector<4x4xf32> into vector<8x16xf32> return %0 : vector<8x16xf32> @@ -2778,13 +2778,13 @@ func.func @insert_strided_2d_constant() -> // ----- -// CHECK-LABEL: func @shuffle_splat +// CHECK-LABEL: func @shuffle_splatlike // CHECK-SAME: (%[[ARG:.*]]: i32) -// CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<4xi32> +// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : i32 to vector<4xi32> // CHECK-NEXT: return %[[SPLAT]] : vector<4xi32> -func.func @shuffle_splat(%x : i32) -> vector<4xi32> { - %v0 = vector.splat %x : vector<4xi32> - %v1 = vector.splat %x : vector<2xi32> +func.func @shuffle_splatlike(%x : i32) -> vector<4xi32> { + %v0 = vector.broadcast %x : i32 to vector<4xi32> + %v1 = vector.broadcast %x : i32 to vector<2xi32> %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32> return %shuffle : vector<4xi32> } @@ -2792,13 +2792,13 @@ func.func @shuffle_splat(%x : i32) -> vector<4xi32> { // ----- -// CHECK-LABEL: func @insert_splat +// CHECK-LABEL: func @insert_splatlike // CHECK-SAME: (%[[ARG:.*]]: i32) -// CHECK-NEXT: %[[SPLAT:.*]] = vector.splat %[[ARG]] : vector<2x4x3xi32> +// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : i32 to vector<2x4x3xi32> // CHECK-NEXT: return %[[SPLAT]] : vector<2x4x3xi32> -func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> { - %v0 = vector.splat %x : vector<4x3xi32> - %v1 = vector.splat %x : vector<2x4x3xi32> +func.func @insert_splatlike(%x : i32) -> vector<2x4x3xi32> { + %v0 = vector.broadcast %x : i32 to vector<4x3xi32> + %v1 = vector.broadcast %x : i32 to vector<2x4x3xi32> %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32> return %insert : vector<2x4x3xi32> } @@ -3030,11 +3030,11 @@ func.func @rank_1_shuffle_to_interleave(%arg0: vector<6xi32>, %arg1: vector<6xi3 // ----- -// CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression( -// CHECK-SAME: %[[a:.*]]: f32, %[[b:.*]]: vector<f32>, %[[c:.*]]: vector<2xf32>) -func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { - // Splat scalar to 0D and extract scalar. - %0 = vector.splat %a : vector<f32> +// CHECK-LABEL: func @extract_from_0d_splatlike_broadcast_regression( +// CHECK-SAME: %[[A:.*]]: f32, %[[B:.*]]: vector<f32>, %[[C:.*]]: vector<2xf32>) +func.func @extract_from_0d_splatlike_broadcast_regression(%a: f32, %b: vector<f32>, %c: vector<2xf32>) -> (f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { + // Splat/broadcast scalar to 0D and extract scalar. + %0 = vector.broadcast %a : f32 to vector<f32> %1 = vector.extract %0[] : f32 from vector<f32> // Broadcast scalar to 0D and extract scalar. @@ -3042,12 +3042,12 @@ func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %3 = vector.extract %2[] : f32 from vector<f32> // Broadcast 0D to 3D and extract scalar. - // CHECK: %[[extract1:.*]] = vector.extract %[[b]][] : f32 from vector<f32> + // CHECK: %[[EXTRACT1:.*]] = vector.extract %[[B]][] : f32 from vector<f32> %4 = vector.broadcast %b : vector<f32> to vector<1x2x4xf32> %5 = vector.extract %4[0, 0, 1] : f32 from vector<1x2x4xf32> - // Splat scalar to 2D and extract scalar. - %6 = vector.splat %a : vector<2x3xf32> + // Splat/broadcast scalar to 2D and extract scalar. + %6 = vector.broadcast %a : f32 to vector<2x3xf32> %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32> // Broadcast scalar to 3D and extract scalar. @@ -3055,14 +3055,14 @@ func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %b: vector<f32>, %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32> // Extract 2D from 3D that was broadcasted from a scalar. - // CHECK: %[[extract2:.*]] = vector.broadcast %[[a]] : f32 to vector<6x7xf32> + // CHECK: %[[EXTRACT2:.*]] = vector.broadcast %[[A]] : f32 to vector<6x7xf32> %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32> // Extract 1D from 2D that was splat'ed from a scalar. - // CHECK: %[[extract3:.*]] = vector.broadcast %[[a]] : f32 to vector<3xf32> + // CHECK: %[[EXTRACT3:.*]] = vector.broadcast %[[A]] : f32 to vector<3xf32> %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32> - // CHECK: return %[[a]], %[[a]], %[[extract1]], %[[a]], %[[a]], %[[extract2]], %[[extract3]] + // CHECK: return %[[A]], %[[A]], %[[EXTRACT1]], %[[A]], %[[A]], %[[EXTRACT2]], %[[EXTRACT3]] return %1, %3, %5, %7, %9, %10, %11 : f32, f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32> } @@ -3504,7 +3504,7 @@ func.func @fold_insert_use_chain(%arg : vector<4x4xf32>, %val : f32, %pos: index %v_0 = vector.insert %val, %arg[%pos, 0] : f32 into vector<4x4xf32> %v_1 = vector.insert %val, %v_0[%pos, 0] : f32 into vector<4x4xf32> %v_2 = vector.insert %val, %v_1[%pos, 0] : f32 into vector<4x4xf32> - return %v_2 : vector<4x4xf32> + return %v_2 : vector<4x4xf32> } // ----- @@ -3518,5 +3518,5 @@ func.func @fold_insert_use_chain(%arg : vector<4x4xf32>, %val : f32, %pos: index func.func @no_fold_insert_use_chain_mismatch_static_position(%arg : vector<4xf32>, %val : f32) -> vector<4xf32> { %v_0 = vector.insert %val, %arg[0] : f32 into vector<4xf32> %v_1 = vector.insert %val, %v_0[1] : f32 into vector<4xf32> - return %v_1 : vector<4xf32> + return %v_1 : vector<4xf32> } diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-from-elements.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-from-elements.mlir index fdab2a8..f43328f 100644 --- a/mlir/test/Dialect/Vector/canonicalize/vector-from-elements.mlir +++ b/mlir/test/Dialect/Vector/canonicalize/vector-from-elements.mlir @@ -36,9 +36,9 @@ func.func @extract_scalar_from_from_elements(%a: f32, %b: f32) -> (f32, f32, f32 // CHECK-SAME: %[[A:.*]]: f32, %[[B:.*]]: f32) func.func @extract_1d_from_from_elements(%a: f32, %b: f32) -> (vector<3xf32>, vector<3xf32>) { %0 = vector.from_elements %a, %a, %a, %b, %b, %b : vector<2x3xf32> - // CHECK: %[[SPLAT1:.*]] = vector.splat %[[A]] : vector<3xf32> + // CHECK: %[[SPLAT1:.*]] = vector.broadcast %[[A]] : f32 to vector<3xf32> %1 = vector.extract %0[0] : vector<3xf32> from vector<2x3xf32> - // CHECK: %[[SPLAT2:.*]] = vector.splat %[[B]] : vector<3xf32> + // CHECK: %[[SPLAT2:.*]] = vector.broadcast %[[B]] : f32 to vector<3xf32> %2 = vector.extract %0[1] : vector<3xf32> from vector<2x3xf32> // CHECK: return %[[SPLAT1]], %[[SPLAT2]] return %1, %2 : vector<3xf32>, vector<3xf32> @@ -63,11 +63,11 @@ func.func @extract_2d_from_from_elements(%a: f32, %b: f32) -> (vector<2x2xf32>, // CHECK-LABEL: func @from_elements_to_splat( // CHECK-SAME: %[[A:.*]]: f32, %[[B:.*]]: f32) func.func @from_elements_to_splat(%a: f32, %b: f32) -> (vector<2x3xf32>, vector<2x3xf32>, vector<f32>) { - // CHECK: %[[SPLAT:.*]] = vector.splat %[[A]] : vector<2x3xf32> + // CHECK: %[[SPLAT:.*]] = vector.broadcast %[[A]] : f32 to vector<2x3xf32> %0 = vector.from_elements %a, %a, %a, %a, %a, %a : vector<2x3xf32> // CHECK: %[[FROM_EL:.*]] = vector.from_elements {{.*}} : vector<2x3xf32> %1 = vector.from_elements %a, %a, %a, %a, %b, %a : vector<2x3xf32> - // CHECK: %[[SPLAT2:.*]] = vector.splat %[[A]] : vector<f32> + // CHECK: %[[SPLAT2:.*]] = vector.broadcast %[[A]] : f32 to vector<f32> %2 = vector.from_elements %a : vector<f32> // CHECK: return %[[SPLAT]], %[[FROM_EL]], %[[SPLAT2]] return %0, %1, %2 : vector<2x3xf32>, vector<2x3xf32>, vector<f32> @@ -170,7 +170,7 @@ func.func @large_source_with_shape_cast_required(%arg0: vector<2x2x2x2xi8>) -> v // Could match, but handled by `rewriteFromElementsAsSplat`. // CHECK-LABEL: func @extract_single_elm( // CHECK-NEXT: vector.extract -// CHECK-NEXT: vector.splat +// CHECK-NEXT: vector.broadcast // CHECK-NEXT: return func.func @extract_single_elm(%arg0 : vector<2x3xi8>) -> vector<1xi8> { %0 = vector.extract %arg0[0, 0] : i8 from vector<2x3xi8> diff --git a/mlir/test/Dialect/Vector/canonicalize/vector-splat.mlir b/mlir/test/Dialect/Vector/canonicalize/vector-splat.mlir new file mode 100644 index 0000000..e4a9391 --- /dev/null +++ b/mlir/test/Dialect/Vector/canonicalize/vector-splat.mlir @@ -0,0 +1,126 @@ +// RUN: mlir-opt %s -canonicalize="test-convergence" -split-input-file -allow-unregistered-dialect | FileCheck %s + +// This file should be removed when vector.splat is removed. +// This file tests canonicalization/folding with vector.splat. +// These tests all have equivalent tests using vector.broadcast in canonicalize.mlir + + +// CHECK-LABEL: fold_extract_splat +// CHECK-SAME: %[[A:.*]]: f32 +// CHECK: return %[[A]] : f32 +func.func @fold_extract_splat(%a : f32, %idx0 : index, %idx1 : index, %idx2 : index) -> f32 { + %b = vector.splat %a : vector<1x2x4xf32> + %r = vector.extract %b[%idx0, %idx1, %idx2] : f32 from vector<1x2x4xf32> + return %r : f32 +} + +// ----- + +// CHECK-LABEL: extract_strided_splat +// CHECK: %[[B:.*]] = vector.broadcast %{{.*}} f16 to vector<2x4xf16> +// CHECK-NEXT: return %[[B]] : vector<2x4xf16> +func.func @extract_strided_splat(%arg0: f16) -> vector<2x4xf16> { + %0 = vector.splat %arg0 : vector<16x4xf16> + %1 = vector.extract_strided_slice %0 + {offsets = [1, 0], sizes = [2, 4], strides = [1, 1]} : + vector<16x4xf16> to vector<2x4xf16> + return %1 : vector<2x4xf16> +} + +// ----- + +// CHECK-LABEL: func @splat_fold +// CHECK-NEXT: [[V:%.*]] = arith.constant dense<1.000000e+00> : vector<4xf32> +// CHECK-NEXT: return [[V]] : vector<4xf32> +func.func @splat_fold() -> vector<4xf32> { + %c = arith.constant 1.0 : f32 + %v = vector.splat %c : vector<4xf32> + return %v : vector<4xf32> + +} + +// ----- + +// CHECK-LABEL: func @transpose_splat2( +// CHECK-SAME: %[[VAL_0:.*]]: f32) -> vector<3x4xf32> { +// CHECK: %[[VAL_1:.*]] = vector.broadcast %[[VAL_0]] : f32 to vector<3x4xf32> +// CHECK: return %[[VAL_1]] : vector<3x4xf32> +func.func @transpose_splat2(%arg : f32) -> vector<3x4xf32> { + %splat = vector.splat %arg : vector<4x3xf32> + %0 = vector.transpose %splat, [1, 0] : vector<4x3xf32> to vector<3x4xf32> + return %0 : vector<3x4xf32> +} + +// ----- + +// CHECK-LABEL: @insert_strided_slice_splat +// CHECK-SAME: (%[[ARG:.*]]: f32) +// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : f32 to vector<8x16xf32> +// CHECK-NEXT: return %[[SPLAT]] : vector<8x16xf32> +func.func @insert_strided_slice_splat(%x: f32) -> (vector<8x16xf32>) { + %splat0 = vector.splat %x : vector<4x4xf32> + %splat1 = vector.splat %x : vector<8x16xf32> + %0 = vector.insert_strided_slice %splat0, %splat1 {offsets = [2, 2], strides = [1, 1]} + : vector<4x4xf32> into vector<8x16xf32> + return %0 : vector<8x16xf32> +} + +// ----- + +// CHECK-LABEL: func @shuffle_splat +// CHECK-SAME: (%[[ARG:.*]]: i32) +// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : i32 to vector<4xi32> +// CHECK-NEXT: return %[[SPLAT]] : vector<4xi32> +func.func @shuffle_splat(%x : i32) -> vector<4xi32> { + %v0 = vector.splat %x : vector<4xi32> + %v1 = vector.splat %x : vector<2xi32> + %shuffle = vector.shuffle %v0, %v1 [2, 3, 4, 5] : vector<4xi32>, vector<2xi32> + return %shuffle : vector<4xi32> +} + + +// ----- + +// CHECK-LABEL: func @insert_splat +// CHECK-SAME: (%[[ARG:.*]]: i32) +// CHECK-NEXT: %[[SPLAT:.*]] = vector.broadcast %[[ARG]] : i32 to vector<2x4x3xi32> +// CHECK-NEXT: return %[[SPLAT]] : vector<2x4x3xi32> +func.func @insert_splat(%x : i32) -> vector<2x4x3xi32> { + %v0 = vector.splat %x : vector<4x3xi32> + %v1 = vector.splat %x : vector<2x4x3xi32> + %insert = vector.insert %v0, %v1[0] : vector<4x3xi32> into vector<2x4x3xi32> + return %insert : vector<2x4x3xi32> +} + +// ----- + +// CHECK-LABEL: func @extract_from_0d_splat_broadcast_regression +// CHECK-SAME: (%[[A:.*]]: f32, %[[C:.*]]: vector<2xf32>) +func.func @extract_from_0d_splat_broadcast_regression(%a: f32, %c: vector<2xf32>) -> (f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32>) { + // Splat scalar to 0D and extract scalar. + %0 = vector.splat %a : vector<f32> + %1 = vector.extract %0[] : f32 from vector<f32> + + // Broadcast scalar to 0D and extract scalar. + %2 = vector.splat %a : vector<f32> + %3 = vector.extract %2[] : f32 from vector<f32> + + // Splat scalar to 2D and extract scalar. + %6 = vector.splat %a : vector<2x3xf32> + %7 = vector.extract %6[0, 1] : f32 from vector<2x3xf32> + + // Broadcast scalar to 3D and extract scalar. + %8 = vector.splat %a : vector<5x6x7xf32> + %9 = vector.extract %8[2, 1, 5] : f32 from vector<5x6x7xf32> + + // Extract 2D from 3D that was broadcasted from a scalar. + // CHECK: %[[EXTRACT2:.*]] = vector.broadcast %[[A]] : f32 to vector<6x7xf32> + %10 = vector.extract %8[2] : vector<6x7xf32> from vector<5x6x7xf32> + + // Extract 1D from 2D that was splat'ed from a scalar. + // CHECK: %[[EXTRACT3:.*]] = vector.broadcast %[[A]] : f32 to vector<3xf32> + %11 = vector.extract %6[1] : vector<3xf32> from vector<2x3xf32> + + // CHECK: return %[[A]], %[[A]], %[[A]], %[[A]], %[[EXTRACT2]], %[[EXTRACT3]] + return %1, %3, %7, %9, %10, %11 : f32, f32, f32, f32, vector<6x7xf32>, vector<3xf32> +} diff --git a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir index 511ab70..1b54d54 100644 --- a/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-to-vector-load-store.mlir @@ -284,19 +284,19 @@ func.func @transfer_read_permutations(%mem_0 : memref<?x?xf32>, %mem_1 : memref< %cst = arith.constant 0.000000e+00 : f32 %c0 = arith.constant 0 : index -// CHECK: %[[MASK0:.*]] = vector.splat %{{.*}} : vector<14x7xi1> +// CHECK: %[[MASK0:.*]] = vector.broadcast %{{.*}} : i1 to vector<14x7xi1> %mask0 = vector.splat %m : vector<14x7xi1> %0 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask0 {in_bounds = [true, false, true, true], permutation_map = #map0} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> // CHECK: vector.transfer_read {{.*}} %[[MASK0]] {in_bounds = [false, true, true, true], permutation_map = #[[$MAP0]]} : memref<?x?x?x?xf32>, vector<14x7x8x16xf32> // CHECK: vector.transpose %{{.*}}, [1, 0, 2, 3] : vector<14x7x8x16xf32> to vector<7x14x8x16xf32> -// CHECK: %[[MASK1:.*]] = vector.splat %{{.*}} : vector<16x14xi1> +// CHECK: %[[MASK1:.*]] = vector.broadcast %{{.*}} : i1 to vector<16x14xi1> %mask1 = vector.splat %m : vector<16x14xi1> %1 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask1 {in_bounds = [true, false, true, false], permutation_map = #map1} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> // CHECK: vector.transfer_read {{.*}} %[[MASK1]] {in_bounds = [false, false, true, true], permutation_map = #[[$MAP0]]} : memref<?x?x?x?xf32>, vector<16x14x7x8xf32> // CHECK: vector.transpose %{{.*}}, [2, 1, 3, 0] : vector<16x14x7x8xf32> to vector<7x14x8x16xf32> -// CHECK: %[[MASK3:.*]] = vector.splat %{{.*}} : vector<14x7xi1> +// CHECK: %[[MASK3:.*]] = vector.broadcast %{{.*}} : i1 to vector<14x7xi1> %mask2 = vector.splat %m : vector<14x7xi1> %2 = vector.transfer_read %mem_1[%c0, %c0, %c0, %c0], %cst, %mask2 {in_bounds = [true, false, true, true], permutation_map = #map2} : memref<?x?x?x?xf32>, vector<7x14x8x16xf32> // CHECK: vector.transfer_read {{.*}} %[[MASK3]] {in_bounds = [false, true, true], permutation_map = #[[$MAP1]]} : memref<?x?x?x?xf32>, vector<14x16x7xf32> @@ -336,7 +336,7 @@ func.func @transfer_write_permutations_tensor_masked( // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index %c0 = arith.constant 0 : index - // CHECK: %[[MASK:.*]] = vector.splat %[[M]] : vector<16x14x7x8xi1> + // CHECK: %[[MASK:.*]] = vector.broadcast %[[M]] : i1 to vector<16x14x7x8xi1> %mask0 = vector.splat %m : vector<16x14x7x8xi1> %res = vector.transfer_write %vec, %dst[%c0, %c0, %c0, %c0], %mask0 {in_bounds = [true, false, false, true], permutation_map = affine_map<(d0, d1, d2, d3) -> (d2, d1, d3, d0)>} : vector<7x14x8x16xf32>, tensor<?x?x?x?xf32> // CHECK: %[[NEW_VEC0:.*]] = vector.transpose %{{.*}} [3, 1, 0, 2] : vector<7x14x8x16xf32> to vector<16x14x7x8xf32> diff --git a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir index 8c4f0aa..85478cc 100644 --- a/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir-invalid.mlir @@ -312,3 +312,42 @@ llvm.func @nvvm_prefetch_uniform_with_invalid_addr_space(%global_ptr: !llvm.ptr< nvvm.prefetch level = L1 uniform, %global_ptr : !llvm.ptr<1> llvm.return } + +// ----- + +llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { + // expected-error@+1 {{'nvvm.stmatrix' op expected num attribute to be 1, 2 or 4}} + nvvm.stmatrix %arg0, %r1, %r2, %r3 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32, i32, i32 + llvm.return +} + +// ----- + +llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { + // expected-error@+1 {{'nvvm.stmatrix' op expected shape to be 8x8 or 16x8}} + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 16, n = 16>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32 + llvm.return +} + +// ----- + +llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { + // expected-error@+1 {{'nvvm.stmatrix' op expected element type to be B16 for 8x8 matrix}} + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b8>} : !llvm.ptr<3>, i32 + llvm.return +} +// ----- + +llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { + // expected-error@+1 {{'nvvm.stmatrix' op expected element type to be B8 for 16x8 matrix}} + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 16, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32 + llvm.return +} + +// ----- + +llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { + // expected-error@+1 {{'nvvm.stmatrix' op expected layout to be col for 16x8 matrix}} + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 16, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b8>} : !llvm.ptr<3>, i32 + llvm.return +} diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir index f86a041..5c2cfa4 100644 --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -573,6 +573,29 @@ llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { llvm.return } +// CHECK-LABEL: @st_matrix +llvm.func @st_matrix(%arg0: !llvm.ptr<3>, %r1: i32, %r2: i32, %r3: i32, %r4: i32) { + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x1.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x1.trans.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m16n8.x1.trans.b8.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 16, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b8>} : !llvm.ptr<3>, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x2.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1, %r2 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x2.trans.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1, %r2 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m16n8.x2.trans.b8.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1, %r2 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 16, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b8>} : !llvm.ptr<3>, i32, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x4.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 {layout = #nvvm.mma_layout<row>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32, i32, i32, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m8n8.x4.trans.b16.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 8, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b16>} : !llvm.ptr<3>, i32, i32, i32, i32 + // CHECK: call void @llvm.nvvm.stmatrix.sync.aligned.m16n8.x4.trans.b8.p3(ptr addrspace(3) %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) + nvvm.stmatrix %arg0, %r1, %r2, %r3, %r4 {layout = #nvvm.mma_layout<col>, shape = #nvvm.ld_st_matrix_shape<m = 16, n = 8>, eltType = #nvvm.ld_st_matrix_elt_type<b8>} : !llvm.ptr<3>, i32, i32, i32, i32 + llvm.return +} + // This function has the "kernel" attribute attached and should appear in the // NVVM annotations after conversion. llvm.func @kernel_func() attributes {nvvm.kernel} { diff --git a/mlir/test/Target/SPIRV/constant.mlir b/mlir/test/Target/SPIRV/constant.mlir index 1695d2a..c81ceac 100644 --- a/mlir/test/Target/SPIRV/constant.mlir +++ b/mlir/test/Target/SPIRV/constant.mlir @@ -335,6 +335,20 @@ spirv.module Logical Vulkan requires #spirv.vce<v1.3, [VulkanMemoryModel, Shader spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xf32> } + // CHECK-LABEL: @null_arm_tensor_of_i32 + spirv.func @null_arm_tensor_of_i32() -> (!spirv.arm.tensor<2x3xi32>) "None" { + // CHECK: spirv.Constant dense<0> : !spirv.arm.tensor<2x3xi32> + %0 = spirv.Constant dense<0> : !spirv.arm.tensor<2x3xi32> + spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xi32> + } + + // CHECK-LABEL: @null_arm_tensor_of_f32 + spirv.func @null_arm_tensor_of_f32() -> (!spirv.arm.tensor<2x3xf32>) "None" { + // CHECK: spirv.Constant dense<0.000000e+00> : !spirv.arm.tensor<2x3xf32> + %0 = spirv.Constant dense<0.0> : !spirv.arm.tensor<2x3xf32> + spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xf32> + } + spirv.EntryPoint "GLCompute" @bool_const } @@ -391,6 +405,20 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, ReplicatedCompos spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xi32> } + // CHECK-LABEL: @splat_array_of_non_splat_array_of_arrays_of_i32 + spirv.func @splat_array_of_non_splat_array_of_arrays_of_i32() -> !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x i32>>> "None" { + // CHECK: spirv.EXT.ConstantCompositeReplicate {{\[}}{{\[}}[1 : i32, 2 : i32, 3 : i32], [4 : i32, 5 : i32, 6 : i32]]] : !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x i32>>> + %0 = spirv.EXT.ConstantCompositeReplicate [[[1 : i32, 2 : i32, 3 : i32], [4 : i32, 5 : i32, 6 : i32]]] : !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x i32>>> + spirv.ReturnValue %0 : !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x i32>>> + } + + // CHECK-LABEL: @null_cc_arm_tensor_of_i32 + spirv.func @null_cc_arm_tensor_of_i32() -> (!spirv.arm.tensor<2x3xi32>) "None" { + // CHECK: spirv.Constant dense<0> : !spirv.arm.tensor<2x3xi32> + %0 = spirv.EXT.ConstantCompositeReplicate [0 : i32] : !spirv.arm.tensor<2x3xi32> + spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xi32> + } + // CHECK-LABEL: @splat_vector_f32 spirv.func @splat_vector_f32() -> (vector<3xf32>) "None" { // CHECK: spirv.EXT.ConstantCompositeReplicate [1.000000e+00 : f32] : vector<3xf32> @@ -439,4 +467,18 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader, ReplicatedCompos %0 = spirv.EXT.ConstantCompositeReplicate [2.0 : f32] : !spirv.arm.tensor<2x3xf32> spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xf32> } + + // CHECK-LABEL: @splat_array_of_non_splat_array_of_arrays_of_f32 + spirv.func @splat_array_of_non_splat_array_of_arrays_of_f32() -> !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x f32>>> "None" { + // CHECK: spirv.EXT.ConstantCompositeReplicate {{\[}}{{\[}}[1.000000e+00 : f32, 2.000000e+00 : f32, 3.000000e+00 : f32], [4.000000e+00 : f32, 5.000000e+00 : f32, 6.000000e+00 : f32]]] : !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x f32>>> + %0 = spirv.EXT.ConstantCompositeReplicate [[[1.0 : f32, 2.0 : f32, 3.0 : f32], [4.0 : f32, 5.0 : f32, 6.0 : f32]]] : !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x f32>>> + spirv.ReturnValue %0 : !spirv.array<2 x !spirv.array<2 x !spirv.array<3 x f32>>> + } + + // CHECK-LABEL: @null_cc_arm_tensor_of_f32 + spirv.func @null_cc_arm_tensor_of_f32() -> (!spirv.arm.tensor<2x3xf32>) "None" { + // CHECK: spirv.Constant dense<0.000000e+00> : !spirv.arm.tensor<2x3xf32> + %0 = spirv.EXT.ConstantCompositeReplicate [0.0 : f32] : !spirv.arm.tensor<2x3xf32> + spirv.ReturnValue %0 : !spirv.arm.tensor<2x3xf32> + } } diff --git a/mlir/test/Target/SPIRV/intel-ext-ops.mlir b/mlir/test/Target/SPIRV/intel-ext-ops.mlir index 6d2fd32..53cf8bf 100644 --- a/mlir/test/Target/SPIRV/intel-ext-ops.mlir +++ b/mlir/test/Target/SPIRV/intel-ext-ops.mlir @@ -33,6 +33,28 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Bfloat16ConversionINTEL] // ----- //===----------------------------------------------------------------------===// +// spirv.INTEL.RoundFToTF32 +//===----------------------------------------------------------------------===// + +spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [TensorFloat32RoundingINTEL], [SPV_INTEL_tensor_float32_conversion]> { + // CHECK-LABEL: @f32_to_tf32 + spirv.func @f32_to_tf32(%arg0 : f32) "None" { + // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : f32 to f32 + %1 = spirv.INTEL.RoundFToTF32 %arg0 : f32 to f32 + spirv.Return + } + + // CHECK-LABEL: @f32_to_tf32_vec + spirv.func @f32_to_tf32_vec(%arg0 : vector<2xf32>) "None" { + // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : vector<2xf32> to vector<2xf32> + %1 = spirv.INTEL.RoundFToTF32 %arg0 : vector<2xf32> to vector<2xf32> + spirv.Return + } +} + +// ----- + +//===----------------------------------------------------------------------===// // spirv.INTEL.SplitBarrier //===----------------------------------------------------------------------===// diff --git a/mlir/test/mlir-translate/emitc_classops.mlir b/mlir/test/mlir-translate/emitc_classops.mlir index 4b7ddf4..d880f9b 100644 --- a/mlir/test/mlir-translate/emitc_classops.mlir +++ b/mlir/test/mlir-translate/emitc_classops.mlir @@ -14,15 +14,12 @@ emitc.class @modelClass { // CHECK-LABEL: class modelClass { // CHECK-NEXT: public: -// CHECK-NEXT: float[1] fieldName0; -// CHECK-NEXT: float[1] fieldName1; +// CHECK-NEXT: float fieldName0[1]; +// CHECK-NEXT: float fieldName1[1]; // CHECK-NEXT: void execute() { // CHECK-NEXT: size_t v1 = 0; -// CHECK-NEXT: float[1] v2 = fieldName0; -// CHECK-NEXT: float[1] v3 = fieldName1; // CHECK-NEXT: return; // CHECK-NEXT: } -// CHECK-EMPTY: // CHECK-NEXT: }; emitc.class final @finalClass { @@ -39,13 +36,43 @@ emitc.class final @finalClass { // CHECK-LABEL: class finalClass final { // CHECK-NEXT: public: -// CHECK-NEXT: float[1] fieldName0; -// CHECK-NEXT: float[1] fieldName1; +// CHECK-NEXT: float fieldName0[1]; +// CHECK-NEXT: float fieldName1[1]; // CHECK-NEXT: void execute() { // CHECK-NEXT: size_t v1 = 0; -// CHECK-NEXT: float[1] v2 = fieldName0; -// CHECK-NEXT: float[1] v3 = fieldName1; // CHECK-NEXT: return; // CHECK-NEXT: } -// CHECK-EMPTY: // CHECK-NEXT: }; + +emitc.class @mainClass { + emitc.field @fieldName0 : !emitc.array<2xf32> = dense<0.0> {attrs = {emitc.name_hint = "another_feature"}} + emitc.func @get_fieldName0() { + %0 = emitc.get_field @fieldName0 : !emitc.array<2xf32> + return + } +} + +// CHECK-LABEL: class mainClass { +// CHECK-NEXT: public: +// CHECK-NEXT: float fieldName0[2] = {0.0e+00f, 0.0e+00f}; +// CHECK-NEXT: void get_fieldName0() { +// CHECK-NEXT: return; +// CHECK-NEXT: } +// CHECK-NEXT: }; + +emitc.class @reflectionClass { + emitc.field @reflectionMap : !emitc.opaque<"const std::map<std::string, std::string>"> = #emitc.opaque<"{ { \22another_feature\22, \22fieldName0\22 } }"> + emitc.func @get_reflectionMap() { + %0 = emitc.get_field @reflectionMap : !emitc.opaque<"const std::map<std::string, std::string>"> + return + } +} + +// CHECK-LABEL: class reflectionClass { +// CHECK-NEXT: public: +// CHECK-NEXT: const std::map<std::string, std::string> reflectionMap = { { "another_feature", "fieldName0" } }; +// CHECK-NEXT: void get_reflectionMap() { +// CHECK-NEXT: return; +// CHECK-NEXT: } +// CHECK-NEXT: }; + diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index a1d5afd..aa635ac 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2285,6 +2285,37 @@ libc_support_library( ) libc_support_library( + name = "__support_math_atan2f", + hdrs = ["src/__support/math/atan2f.h"], + deps = [ + ":__support_fputil_fenv_impl", + ":__support_fputil_fp_bits", + ":__support_fputil_polyeval", + ":__support_fputil_double_double", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_macros_config", + ":__support_macros_optimization", + ":__support_math_inv_trigf_utils", + ], +) + +libc_support_library( + name = "__support_math_atan2f128", + hdrs = ["src/__support/math/atan2f128.h"], + deps = [ + ":__support_math_atan_utils", + ":__support_fputil_fp_bits", + ":__support_fputil_dyadic_float", + ":__support_fputil_nearest_integer", + ":__support_integer_literals", + ":__support_macros_config", + ":__support_macros_optimization", + ":__support_uint128", + ], +) + +libc_support_library( name = "__support_math_atanf", hdrs = ["src/__support/math/atanf.h"], deps = [ @@ -2343,6 +2374,18 @@ libc_support_library( ) libc_support_library( + name = "__support_math_atanhf", + hdrs = ["src/__support/math/atanhf.h"], + deps = [ + ":__support_math_acoshf_utils", + ":__support_fputil_fenv_impl", + ":__support_fputil_fp_bits", + ":__support_macros_config", + ":__support_macros_optimization", + ], +) + +libc_support_library( name = "__support_math_erff", hdrs = ["src/__support/math/erff.h"], deps = [ @@ -2946,9 +2989,14 @@ libc_math_function( libc_math_function( name = "atan2f", additional_deps = [ - ":__support_fputil_double_double", - ":__support_fputil_nearest_integer", - ":__support_math_inv_trigf_utils", + ":__support_math_atan2f", + ], +) + +libc_math_function( + name = "atan2f128", + additional_deps = [ + ":__support_math_atan2f128", ], ) @@ -2962,13 +3010,7 @@ libc_math_function( libc_math_function( name = "atanhf", additional_deps = [ - ":__support_fputil_fma", - ":__support_fputil_multiply_add", - ":__support_fputil_nearest_integer", - ":__support_fputil_polyeval", - ":__support_macros_optimization", - ":common_constants", - ":explogxf", + ":__support_math_atanhf", ], ) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel index 96cd788..e96fc03 100644 --- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -152,6 +152,7 @@ cc_library( "//clang:lex", "//clang:sema", "//lldb:CoreHeaders", + "//lldb:ExpressionHeaders", "//lldb:Host", "//lldb:SymbolHeaders", "//lldb:TargetHeaders", |