diff options
author | Thurston Dang <thurston@google.com> | 2024-12-18 15:53:35 -0800 |
---|---|---|
committer | Vitaly Buka <vitalybuka@google.com> | 2024-12-18 15:53:35 -0800 |
commit | 014a3c31ce7143093d8d1aafc6866e0c8a05a44b (patch) | |
tree | 09ed7378c44e3692dbe6b7ed283fdac33a16e14b | |
parent | 97ff599fc6949dc2bac3af7eb3200ebd35990c5a (diff) | |
parent | 2691b964150c77a9e6967423383ad14a7693095e (diff) | |
download | llvm-users/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable.zip llvm-users/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable.tar.gz llvm-users/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable.tar.bz2 |
[𝘀𝗽𝗿] changes introduced through rebaseusers/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable
Created using spr 1.3.4
[skip ci]
162 files changed, 6993 insertions, 1870 deletions
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 29858f0..122118b 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2186,7 +2186,11 @@ enum CXCursorKind { */ CXCursor_OpenACCHostDataConstruct = 326, - CXCursor_LastStmt = CXCursor_OpenACCHostDataConstruct, + /** OpenACC wait Construct. + */ + CXCursor_OpenACCWaitConstruct = 327, + + CXCursor_LastStmt = CXCursor_OpenACCWaitConstruct, /** * Cursor that represents the translation unit itself. diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index 3d63d58..f5652b2 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -159,7 +159,7 @@ public: // Some statements have custom mechanisms for dumping their children. if (isa<DeclStmt>(S) || isa<GenericSelectionExpr>(S) || - isa<RequiresExpr>(S)) + isa<RequiresExpr>(S) || isa<OpenACCWaitConstruct>(S)) return; if (Traversal == TK_IgnoreUnlessSpelledInSource && @@ -825,6 +825,16 @@ public: Visit(C); } + void VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *Node) { + // Needs custom child checking to put clauses AFTER the children, which are + // the expressions in the 'wait' construct. Others likely need this as well, + // and might need to do the associated statement after it. + for (const Stmt *S : Node->children()) + Visit(S); + for (const auto *C : Node->clauses()) + Visit(C); + } + void VisitInitListExpr(const InitListExpr *ILE) { if (auto *Filler = ILE->getArrayFiller()) { Visit(Filler, "array_filler"); diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 5d5c91f..d9a87b3 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -4063,10 +4063,19 @@ DEF_TRAVERSE_STMT(OpenACCCombinedConstruct, { TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); }) DEF_TRAVERSE_STMT(OpenACCDataConstruct, { TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); }) -DEF_TRAVERSE_STMT(OpenACCEnterDataConstruct, {}) -DEF_TRAVERSE_STMT(OpenACCExitDataConstruct, {}) +DEF_TRAVERSE_STMT(OpenACCEnterDataConstruct, + { TRY_TO(VisitOpenACCClauseList(S->clauses())); }) +DEF_TRAVERSE_STMT(OpenACCExitDataConstruct, + { TRY_TO(VisitOpenACCClauseList(S->clauses())); }) DEF_TRAVERSE_STMT(OpenACCHostDataConstruct, { TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); }) +DEF_TRAVERSE_STMT(OpenACCWaitConstruct, { + if (S->hasDevNumExpr()) + TRY_TO(TraverseStmt(S->getDevNumExpr())); + for (auto *E : S->getQueueIdExprs()) + TRY_TO(TraverseStmt(E)); + TRY_TO(VisitOpenACCClauseList(S->clauses())); +}) // Traverse HLSL: Out argument expression DEF_TRAVERSE_STMT(HLSLOutArgExpr, {}) diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h index a1903cd..093393a 100644 --- a/clang/include/clang/AST/StmtOpenACC.h +++ b/clang/include/clang/AST/StmtOpenACC.h @@ -469,5 +469,128 @@ public: return const_cast<OpenACCHostDataConstruct *>(this)->getStructuredBlock(); } }; + +// This class represents a 'wait' construct, which has some expressions plus a +// clause list. +class OpenACCWaitConstruct final + : public OpenACCConstructStmt, + private llvm::TrailingObjects<OpenACCWaitConstruct, Expr *, + OpenACCClause *> { + // FIXME: We should be storing a `const OpenACCClause *` to be consistent with + // the rest of the constructs, but TrailingObjects doesn't allow for mixing + // constness in its implementation of `getTrailingObjects`. + + friend TrailingObjects; + friend class ASTStmtWriter; + friend class ASTStmtReader; + // Locations of the left and right parens of the 'wait-argument' + // expression-list. + SourceLocation LParenLoc, RParenLoc; + // Location of the 'queues' keyword, if present. + SourceLocation QueuesLoc; + + // Number of the expressions being represented. Index '0' is always the + // 'devnum' expression, even if it not present. + unsigned NumExprs = 0; + + OpenACCWaitConstruct(unsigned NumExprs, unsigned NumClauses) + : OpenACCConstructStmt(OpenACCWaitConstructClass, + OpenACCDirectiveKind::Wait, SourceLocation{}, + SourceLocation{}, SourceLocation{}), + NumExprs(NumExprs) { + assert(NumExprs >= 1 && + "NumExprs should always be >= 1 because the 'devnum' " + "expr is represented by a null if necessary"); + std::uninitialized_value_construct(getExprPtr(), + getExprPtr() + NumExprs); + std::uninitialized_value_construct(getTrailingObjects<OpenACCClause *>(), + getTrailingObjects<OpenACCClause *>() + + NumClauses); + setClauseList(MutableArrayRef(const_cast<const OpenACCClause **>( + getTrailingObjects<OpenACCClause *>()), + NumClauses)); + } + + OpenACCWaitConstruct(SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation LParenLoc, Expr *DevNumExpr, + SourceLocation QueuesLoc, ArrayRef<Expr *> QueueIdExprs, + SourceLocation RParenLoc, SourceLocation End, + ArrayRef<const OpenACCClause *> Clauses) + : OpenACCConstructStmt(OpenACCWaitConstructClass, + OpenACCDirectiveKind::Wait, Start, DirectiveLoc, + End), + LParenLoc(LParenLoc), RParenLoc(RParenLoc), QueuesLoc(QueuesLoc), + NumExprs(QueueIdExprs.size() + 1) { + assert(NumExprs >= 1 && + "NumExprs should always be >= 1 because the 'devnum' " + "expr is represented by a null if necessary"); + + std::uninitialized_copy(&DevNumExpr, &DevNumExpr + 1, + getExprPtr()); + std::uninitialized_copy(QueueIdExprs.begin(), QueueIdExprs.end(), + getExprPtr() + 1); + + std::uninitialized_copy(const_cast<OpenACCClause **>(Clauses.begin()), + const_cast<OpenACCClause **>(Clauses.end()), + getTrailingObjects<OpenACCClause *>()); + setClauseList(MutableArrayRef(const_cast<const OpenACCClause **>( + getTrailingObjects<OpenACCClause *>()), + Clauses.size())); + } + + size_t numTrailingObjects(OverloadToken<Expr *>) const { return NumExprs; } + size_t numTrailingObjects(OverloadToken<const OpenACCClause *>) const { + return clauses().size(); + } + + Expr **getExprPtr() const { + return const_cast<Expr**>(getTrailingObjects<Expr *>()); + } + + llvm::ArrayRef<Expr *> getExprs() const { + return llvm::ArrayRef<Expr *>(getExprPtr(), NumExprs); + } + + llvm::ArrayRef<Expr *> getExprs() { + return llvm::ArrayRef<Expr *>(getExprPtr(), NumExprs); + } + +public: + static bool classof(const Stmt *T) { + return T->getStmtClass() == OpenACCWaitConstructClass; + } + + static OpenACCWaitConstruct * + CreateEmpty(const ASTContext &C, unsigned NumExprs, unsigned NumClauses); + + static OpenACCWaitConstruct * + Create(const ASTContext &C, SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation LParenLoc, Expr *DevNumExpr, SourceLocation QueuesLoc, + ArrayRef<Expr *> QueueIdExprs, SourceLocation RParenLoc, + SourceLocation End, ArrayRef<const OpenACCClause *> Clauses); + + SourceLocation getLParenLoc() const { return LParenLoc; } + SourceLocation getRParenLoc() const { return RParenLoc; } + bool hasQueuesTag() const { return !QueuesLoc.isInvalid(); } + SourceLocation getQueuesLoc() const { return QueuesLoc; } + + bool hasDevNumExpr() const { return getExprs()[0]; } + Expr *getDevNumExpr() const { return getExprs()[0]; } + llvm::ArrayRef<Expr *> getQueueIdExprs() { return getExprs().drop_front(); } + llvm::ArrayRef<Expr *> getQueueIdExprs() const { + return getExprs().drop_front(); + } + + child_range children() { + Stmt **Begin = reinterpret_cast<Stmt **>(getExprPtr()); + return child_range(Begin, Begin + NumExprs); + } + + const_child_range children() const { + Stmt *const *Begin = + reinterpret_cast<Stmt *const *>(getExprPtr()); + return const_child_range(Begin, Begin + NumExprs); + } +}; } // namespace clang #endif // LLVM_CLANG_AST_STMTOPENACC_H diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h index e54e7e5..b6f16be 100644 --- a/clang/include/clang/AST/TextNodeDumper.h +++ b/clang/include/clang/AST/TextNodeDumper.h @@ -415,6 +415,7 @@ public: void VisitOpenACCEnterDataConstruct(const OpenACCEnterDataConstruct *S); void VisitOpenACCExitDataConstruct(const OpenACCExitDataConstruct *S); void VisitOpenACCHostDataConstruct(const OpenACCHostDataConstruct *S); + void VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S); void VisitOpenACCAsteriskSizeExpr(const OpenACCAsteriskSizeExpr *S); void VisitEmbedExpr(const EmbedExpr *S); void VisitAtomicExpr(const AtomicExpr *AE); diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 0c3c580..6c7314b 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -312,6 +312,7 @@ def OpenACCDataConstruct : StmtNode<OpenACCAssociatedStmtConstruct>; def OpenACCEnterDataConstruct : StmtNode<OpenACCConstructStmt>; def OpenACCExitDataConstruct : StmtNode<OpenACCConstructStmt>; def OpenACCHostDataConstruct : StmtNode<OpenACCAssociatedStmtConstruct>; +def OpenACCWaitConstruct : StmtNode<OpenACCConstructStmt>; // OpenACC Additional Expressions. def OpenACCAsteriskSizeExpr : StmtNode<Expr>; diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index d3838a4..e99d2cf 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3706,10 +3706,14 @@ private: OpenACCDirectiveKind DirKind; SourceLocation StartLoc; SourceLocation DirLoc; + SourceLocation LParenLoc; + SourceLocation RParenLoc; SourceLocation EndLoc; + SourceLocation MiscLoc; + SmallVector<Expr *> Exprs; SmallVector<OpenACCClause *> Clauses; - // TODO OpenACC: As we implement support for the Atomic, Routine, Cache, and - // Wait constructs, we likely want to put that information in here as well. + // TODO OpenACC: As we implement support for the Atomic, Routine, and Cache + // constructs, we likely want to put that information in here as well. }; struct OpenACCWaitParseInfo { @@ -3717,6 +3721,13 @@ private: Expr *DevNumExpr = nullptr; SourceLocation QueuesLoc; SmallVector<Expr *> QueueIdExprs; + + SmallVector<Expr *> getAllExprs() { + SmallVector<Expr *> Out; + Out.push_back(DevNumExpr); + Out.insert(Out.end(), QueueIdExprs.begin(), QueueIdExprs.end()); + return Out; + } }; /// Represents the 'error' state of parsing an OpenACC Clause, and stores diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h index 4b92af5..04ab1ac 100644 --- a/clang/include/clang/Sema/SemaOpenACC.h +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -679,12 +679,18 @@ public: /// Called after the directive has been completely parsed, including the /// declaration group or associated statement. - StmtResult ActOnEndStmtDirective(OpenACCDirectiveKind K, - SourceLocation StartLoc, - SourceLocation DirLoc, - SourceLocation EndLoc, - ArrayRef<OpenACCClause *> Clauses, - StmtResult AssocStmt); + /// LParenLoc: Location of the left paren, if it exists (not on all + /// constructs). + /// MiscLoc: First misc location, if necessary (not all constructs). + /// Exprs: List of expressions on the construct itself, if necessary (not all + /// constructs). + /// RParenLoc: Location of the right paren, if it exists (not on all + /// constructs). + StmtResult ActOnEndStmtDirective( + OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation LParenLoc, SourceLocation MiscLoc, ArrayRef<Expr *> Exprs, + SourceLocation RParenLoc, SourceLocation EndLoc, + ArrayRef<OpenACCClause *> Clauses, StmtResult AssocStmt); /// Called after the directive has been completely parsed, including the /// declaration group or associated statement. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index da0e5fd..57e27c3 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -2021,6 +2021,7 @@ enum StmtCode { STMT_OPENACC_ENTER_DATA_CONSTRUCT, STMT_OPENACC_EXIT_DATA_CONSTRUCT, STMT_OPENACC_HOST_DATA_CONSTRUCT, + STMT_OPENACC_WAIT_CONSTRUCT, // HLSL Constructs EXPR_HLSL_OUT_ARG, diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp index fa06dff..646eabd 100644 --- a/clang/lib/APINotes/APINotesReader.cpp +++ b/clang/lib/APINotes/APINotesReader.cpp @@ -2045,7 +2045,12 @@ APINotesReader::VersionedInfo<T>::VersionedInfo( Results.begin(), Results.end(), [](const std::pair<llvm::VersionTuple, T> &left, const std::pair<llvm::VersionTuple, T> &right) -> bool { - assert(left.first != right.first && "two entries for the same version"); + // The comparison function should be reflective, and with expensive + // checks we can get callbacks basically checking that lambda(a,a) is + // false. We could still check that we do not find equal elements when + // left!=right. + assert((&left == &right || left.first != right.first) && + "two entries for the same version"); return left.first < right.first; })); diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp index fb73dfb..6d9f267 100644 --- a/clang/lib/AST/StmtOpenACC.cpp +++ b/clang/lib/AST/StmtOpenACC.cpp @@ -196,3 +196,32 @@ OpenACCHostDataConstruct *OpenACCHostDataConstruct::Create( Clauses, StructuredBlock); return Inst; } + +OpenACCWaitConstruct *OpenACCWaitConstruct::CreateEmpty(const ASTContext &C, + unsigned NumExprs, + unsigned NumClauses) { + void *Mem = C.Allocate( + OpenACCWaitConstruct::totalSizeToAlloc<Expr *, OpenACCClause *>( + NumExprs, NumClauses)); + + auto *Inst = new (Mem) OpenACCWaitConstruct(NumExprs, NumClauses); + return Inst; +} + +OpenACCWaitConstruct *OpenACCWaitConstruct::Create( + const ASTContext &C, SourceLocation Start, SourceLocation DirectiveLoc, + SourceLocation LParenLoc, Expr *DevNumExpr, SourceLocation QueuesLoc, + ArrayRef<Expr *> QueueIdExprs, SourceLocation RParenLoc, SourceLocation End, + ArrayRef<const OpenACCClause *> Clauses) { + + assert(llvm::all_of(QueueIdExprs, [](Expr *E) { return E != nullptr; })); + + void *Mem = C.Allocate( + OpenACCWaitConstruct::totalSizeToAlloc<Expr *, OpenACCClause *>( + QueueIdExprs.size() + 1, Clauses.size())); + + auto *Inst = new (Mem) + OpenACCWaitConstruct(Start, DirectiveLoc, LParenLoc, DevNumExpr, + QueuesLoc, QueueIdExprs, RParenLoc, End, Clauses); + return Inst; +} diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index 488419a..ecc9b6e3 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -1238,6 +1238,34 @@ void StmtPrinter::VisitOpenACCHostDataConstruct(OpenACCHostDataConstruct *S) { PrintStmt(S->getStructuredBlock()); } +void StmtPrinter::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) { + Indent() << "#pragma acc wait"; + if (!S->getLParenLoc().isInvalid()) { + OS << "("; + if (S->hasDevNumExpr()) { + OS << "devnum: "; + S->getDevNumExpr()->printPretty(OS, nullptr, Policy); + OS << " : "; + } + + if (S->hasQueuesTag()) + OS << "queues: "; + + llvm::interleaveComma(S->getQueueIdExprs(), OS, [&](const Expr *E) { + E->printPretty(OS, nullptr, Policy); + }); + + OS << ")"; + } + + if (!S->clauses().empty()) { + OS << ' '; + OpenACCClausePrinter Printer(OS, Policy); + Printer.VisitClauseList(S->clauses()); + } + OS << '\n'; +} + //===----------------------------------------------------------------------===// // Expr printing methods. //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 1fb2387..fccd97d 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2743,6 +2743,14 @@ void StmtProfiler::VisitOpenACCHostDataConstruct( P.VisitOpenACCClauseList(S->clauses()); } +void StmtProfiler::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S) { + // VisitStmt covers 'children', so the exprs inside of it are covered. + VisitStmt(S); + + OpenACCClauseProfiler P{*this}; + P.VisitOpenACCClauseList(S->clauses()); +} + void StmtProfiler::VisitHLSLOutArgExpr(const HLSLOutArgExpr *S) { VisitStmt(S); } diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp index b5af10d..7cdffbe 100644 --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -2960,6 +2960,10 @@ void TextNodeDumper::VisitOpenACCHostDataConstruct( OS << " " << S->getDirectiveKind(); } +void TextNodeDumper::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S) { + OS << " " << S->getDirectiveKind(); +} + void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) { AddChild("begin", [=] { OS << S->getStartingElementPos(); }); AddChild("number of elements", [=] { OS << S->getDataElementCount(); }); diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6c7a594..6c604f4 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -470,6 +470,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { case Stmt::OpenACCHostDataConstructClass: EmitOpenACCHostDataConstruct(cast<OpenACCHostDataConstruct>(*S)); break; + case Stmt::OpenACCWaitConstructClass: + EmitOpenACCWaitConstruct(cast<OpenACCWaitConstruct>(*S)); + break; } } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 092d553..847999c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4118,6 +4118,11 @@ public: EmitStmt(S.getStructuredBlock()); } + void EmitOpenACCWaitConstruct(const OpenACCWaitConstruct &S) { + // TODO OpenACC: Implement this. It is currently implemented as a 'no-op', + // but in the future we will implement some sort of IR. + } + //===--------------------------------------------------------------------===// // LValue Expression Emission //===--------------------------------------------------------------------===// diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 7248abe..cda218e 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -1146,12 +1146,15 @@ struct CounterCoverageMappingBuilder /// Create a Branch Region around a SwitchCase for code coverage /// and add it to the function's SourceRegions. - void createSwitchCaseRegion(const SwitchCase *SC, Counter TrueCnt) { + /// Returns Counter that corresponds to SC. + Counter createSwitchCaseRegion(const SwitchCase *SC, Counter ParentCount) { // Push region onto RegionStack but immediately pop it (which adds it to // the function's SourceRegions) because it doesn't apply to any other // source other than the SwitchCase. + Counter TrueCnt = getRegionCounter(SC); popRegions(pushRegion(TrueCnt, getStart(SC), SC->getColonLoc(), - Counter::getZero())); + subtractCounters(ParentCount, TrueCnt))); + return TrueCnt; } /// Check whether a region with bounds \c StartLoc and \c EndLoc @@ -1863,16 +1866,22 @@ struct CounterCoverageMappingBuilder const SwitchCase *Case = S->getSwitchCaseList(); for (; Case; Case = Case->getNextSwitchCase()) { HasDefaultCase = HasDefaultCase || isa<DefaultStmt>(Case); - auto CaseCount = getRegionCounter(Case); + auto CaseCount = createSwitchCaseRegion(Case, ParentCount); CaseCountSum = addCounters(CaseCountSum, CaseCount, /*Simplify=*/false); - createSwitchCaseRegion(Case, CaseCount); } // If no explicit default case exists, create a branch region to represent // the hidden branch, which will be added later by the CodeGen. This region // will be associated with the switch statement's condition. if (!HasDefaultCase) { - Counter DefaultCount = subtractCounters(ParentCount, CaseCountSum); - createBranchRegion(S->getCond(), Counter::getZero(), DefaultCount); + // Simplify is skipped while building the counters above: it can get + // really slow on top of switches with thousands of cases. Instead, + // trigger simplification by adding zero to the last counter. + CaseCountSum = + addCounters(CaseCountSum, Counter::getZero(), /*Simplify=*/true); + + // This is considered as the False count on SwitchStmt. + Counter SwitchFalse = subtractCounters(ParentCount, CaseCountSum); + createBranchRegion(S->getCond(), CaseCountSum, SwitchFalse); } } diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index 9ffdc04..554b55f 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -461,9 +461,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { getColumnLimit(State) || CurrentState.BreakBeforeParameter) && (!Current.isTrailingComment() || Current.NewlinesBefore > 0) && - (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All || - Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || - Style.ColumnLimit != 0)) { + (Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || + Style.ColumnLimit > 0 || Current.NewlinesBefore > 0)) { return true; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 05c86db..6a8caa2 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -492,7 +492,8 @@ private: ProbablyFunctionType && CurrentToken->Next && (CurrentToken->Next->is(tok::l_paren) || (CurrentToken->Next->is(tok::l_square) && - Line.MustBeDeclaration))) { + (Line.MustBeDeclaration || + PrevNonComment->isTypeName(LangOpts))))) { OpeningParen.setType(OpeningParen.Next->is(tok::caret) ? TT_ObjCBlockLParen : TT_FunctionTypeLParen); @@ -2403,7 +2404,8 @@ private: // not auto operator->() -> xxx; Current.setType(TT_TrailingReturnArrow); } else if (Current.is(tok::arrow) && Current.Previous && - Current.Previous->is(tok::r_brace)) { + Current.Previous->is(tok::r_brace) && + Current.Previous->is(BK_Block)) { // Concept implicit conversion constraint needs to be treated like // a trailing return type ... } -> <type>. Current.setType(TT_TrailingReturnArrow); diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index de7e261..654148a 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -570,8 +570,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, Keywords.kw_as)); ProbablyBracedList = - ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() || - NextTok->is(tok::l_paren))); + ProbablyBracedList || + (IsCpp && (PrevTok->Tok.isLiteral() || + NextTok->isOneOf(tok::l_paren, tok::arrow))); // If there is a comma, semicolon or right paren after the closing // brace, we assume this is a braced initializer list. diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index af175a4..5da34a2 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -573,6 +573,7 @@ bool doesDirectiveHaveAssociatedStmt(OpenACCDirectiveKind DirKind) { default: case OpenACCDirectiveKind::EnterData: case OpenACCDirectiveKind::ExitData: + case OpenACCDirectiveKind::Wait: return false; case OpenACCDirectiveKind::Parallel: case OpenACCDirectiveKind::Serial: @@ -604,6 +605,7 @@ unsigned getOpenACCScopeFlags(OpenACCDirectiveKind DirKind) { case OpenACCDirectiveKind::EnterData: case OpenACCDirectiveKind::ExitData: case OpenACCDirectiveKind::HostData: + case OpenACCDirectiveKind::Wait: return 0; case OpenACCDirectiveKind::Invalid: llvm_unreachable("Shouldn't be creating a scope for an invalid construct"); @@ -1288,7 +1290,8 @@ Parser::ParseOpenACCWaitArgument(SourceLocation Loc, bool IsDirective) { return Result; } - Result.QueueIdExprs.push_back(Res.first.get()); + if (Res.first.isUsable()) + Result.QueueIdExprs.push_back(Res.first.get()); } return Result; @@ -1422,6 +1425,7 @@ Parser::ParseOpenACCDirective() { SourceLocation StartLoc = ConsumeAnnotationToken(); SourceLocation DirLoc = getCurToken().getLocation(); OpenACCDirectiveKind DirKind = ParseOpenACCDirectiveKind(*this); + Parser::OpenACCWaitParseInfo WaitInfo; getActions().OpenACC().ActOnConstruct(DirKind, DirLoc); @@ -1462,7 +1466,8 @@ Parser::ParseOpenACCDirective() { break; case OpenACCDirectiveKind::Wait: // OpenACC has an optional paren-wrapped 'wait-argument'. - if (ParseOpenACCWaitArgument(DirLoc, /*IsDirective=*/true).Failed) + WaitInfo = ParseOpenACCWaitArgument(DirLoc, /*IsDirective=*/true); + if (WaitInfo.Failed) T.skipToEnd(); else T.consumeClose(); @@ -1476,8 +1481,14 @@ Parser::ParseOpenACCDirective() { } // Parses the list of clauses, if present, plus set up return value. - OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, DirLoc, - SourceLocation{}, + OpenACCDirectiveParseInfo ParseInfo{DirKind, + StartLoc, + DirLoc, + T.getOpenLocation(), + T.getCloseLocation(), + /*EndLoc=*/SourceLocation{}, + WaitInfo.QueuesLoc, + WaitInfo.getAllExprs(), ParseOpenACCClauseList(DirKind)}; assert(Tok.is(tok::annot_pragma_openacc_end) && @@ -1529,6 +1540,7 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { } return getActions().OpenACC().ActOnEndStmtDirective( - DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.EndLoc, + DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.LParenLoc, + DirInfo.MiscLoc, DirInfo.Exprs, DirInfo.RParenLoc, DirInfo.EndLoc, DirInfo.Clauses, AssocStmt); } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 2be6af2..505cc5e 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1398,6 +1398,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Expr::HLSLOutArgExprClass: case Stmt::OpenACCEnterDataConstructClass: case Stmt::OpenACCExitDataConstructClass: + case Stmt::OpenACCWaitConstructClass: // These expressions can never throw. return CT_Cannot; diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index 11c1835..aa9097b 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -41,6 +41,7 @@ bool diagnoseConstructAppertainment(SemaOpenACC &S, OpenACCDirectiveKind K, case OpenACCDirectiveKind::EnterData: case OpenACCDirectiveKind::ExitData: case OpenACCDirectiveKind::HostData: + case OpenACCDirectiveKind::Wait: if (!IsStmt) return S.Diag(StartLoc, diag::err_acc_construct_appertainment) << K; break; @@ -566,6 +567,16 @@ bool checkValidAfterDeviceType( return true; } +// A temporary function that helps implement the 'not implemented' check at the +// top of each clause checking function. This should only be used in conjunction +// with the one being currently implemented/only updated after the entire +// construct has been implemented. +bool isDirectiveKindImplemented(OpenACCDirectiveKind DK) { + return isOpenACCComputeDirectiveKind(DK) || + isOpenACCCombinedDirectiveKind(DK) || isOpenACCDataDirectiveKind(DK) || + DK == OpenACCDirectiveKind::Loop || DK == OpenACCDirectiveKind::Wait; +} + class SemaOpenACCClauseVisitor { SemaOpenACC &SemaRef; ASTContext &Ctx; @@ -680,9 +691,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitIfClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // constructs that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // There is no prose in the standard that says duplicates aren't allowed, @@ -717,8 +726,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitSelfClause( // Restrictions only properly implemented on 'compute' constructs, and // 'compute' constructs are the only construct that can do anything with // this yet, so skip/treat as unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // TODO OpenACC: When we implement this for 'update', this takes a @@ -915,9 +923,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitAsyncClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // There is no prose in the standard that says duplicates aren't allowed, @@ -973,9 +979,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though @@ -992,9 +996,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though @@ -1011,9 +1013,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyInClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though @@ -1030,9 +1030,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyOutClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, so there // really isn't anything to do here. GCC does some duplicate-finding, though @@ -1109,9 +1107,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitDevicePtrClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // ActOnVar ensured that everything is a valid variable reference, but we @@ -1134,9 +1130,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitWaitClause( // constructs, and 'compute'/'combined'/'data' constructs are the only // construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) && - !isOpenACCDataDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); return OpenACCWaitClause::Create( @@ -1150,10 +1144,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceTypeClause( // 'loop' constructs, and 'compute'/'combined'/'data'/'loop' constructs are // the only construct that can do anything with this yet, so skip/treat as // unimplemented in this case. - if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) && - Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop && - Clause.getDirectiveKind() != OpenACCDirectiveKind::Data && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // TODO OpenACC: Once we get enough of the CodeGen implemented that we have @@ -1347,8 +1338,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorClause( // Restrictions only properly implemented on 'loop'/'combined' constructs, and // it is the only construct that can do anything with this, so skip/treat as // unimplemented for the routine constructs. - if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); Expr *IntExpr = @@ -1446,8 +1436,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitWorkerClause( // Restrictions only properly implemented on 'loop'/'combined' constructs, and // it is the only construct that can do anything with this, so skip/treat as // unimplemented for the routine constructs. - if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); Expr *IntExpr = @@ -1559,8 +1548,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitGangClause( // Restrictions only properly implemented on 'loop' constructs, and it is // the only construct that can do anything with this, so skip/treat as // unimplemented for the combined constructs. - if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop && - !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind())) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // OpenACC 3.3 Section 2.9.11: A reduction clause may not appear on a loop @@ -1691,7 +1679,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitFinalizeClause( OpenACCClause *SemaOpenACCClauseVisitor::VisitIfPresentClause( SemaOpenACC::OpenACCParsedClause &Clause) { - if (Clause.getDirectiveKind() != OpenACCDirectiveKind::HostData) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // There isn't anything to do here, this is only valid on one construct, and // has no associated rules. @@ -1704,7 +1692,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitSeqClause( // Restrictions only properly implemented on 'loop' constructs and combined , // and it is the only construct that can do anything with this, so skip/treat // as unimplemented for the routine constructs. - if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Routine) + if (!isDirectiveKindImplemented(Clause.getDirectiveKind())) return isNotImplemented(); // OpenACC 3.3 2.9: @@ -1879,6 +1867,7 @@ bool PreserveLoopRAIIDepthInAssociatedStmtRAII(OpenACCDirectiveKind DK) { return true; case OpenACCDirectiveKind::EnterData: case OpenACCDirectiveKind::ExitData: + case OpenACCDirectiveKind::Wait: llvm_unreachable("Doesn't have an associated stmt"); default: case OpenACCDirectiveKind::Invalid: @@ -2308,6 +2297,10 @@ void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, // Nothing to do here, there is no real legalization that needs to happen // here as these constructs do not take any arguments. break; + case OpenACCDirectiveKind::Wait: + // Nothing really to do here, the arguments to the 'wait' should have + // already been handled by the time we get here. + break; default: Diag(DirLoc, diag::warn_acc_construct_unimplemented) << K; break; @@ -3637,12 +3630,11 @@ bool SemaOpenACC::ActOnStartStmtDirective( return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true); } -StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, - SourceLocation StartLoc, - SourceLocation DirLoc, - SourceLocation EndLoc, - ArrayRef<OpenACCClause *> Clauses, - StmtResult AssocStmt) { +StmtResult SemaOpenACC::ActOnEndStmtDirective( + OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc, + SourceLocation LParenLoc, SourceLocation MiscLoc, ArrayRef<Expr *> Exprs, + SourceLocation RParenLoc, SourceLocation EndLoc, + ArrayRef<OpenACCClause *> Clauses, StmtResult AssocStmt) { switch (K) { default: return StmtEmpty(); @@ -3685,6 +3677,11 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, getASTContext(), StartLoc, DirLoc, EndLoc, Clauses, AssocStmt.isUsable() ? AssocStmt.get() : nullptr); } + case OpenACCDirectiveKind::Wait: { + return OpenACCWaitConstruct::Create( + getASTContext(), StartLoc, DirLoc, LParenLoc, Exprs.front(), MiscLoc, + Exprs.drop_front(), RParenLoc, EndLoc, Clauses); + } } llvm_unreachable("Unhandled case in directive handling?"); } @@ -3697,6 +3694,7 @@ StmtResult SemaOpenACC::ActOnAssociatedStmt( llvm_unreachable("Unimplemented associated statement application"); case OpenACCDirectiveKind::EnterData: case OpenACCDirectiveKind::ExitData: + case OpenACCDirectiveKind::Wait: llvm_unreachable( "these don't have associated statements, so shouldn't get here"); case OpenACCDirectiveKind::Parallel: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 04167e7..c097465 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -4087,8 +4087,9 @@ public: SourceLocation EndLoc, ArrayRef<OpenACCClause *> Clauses, StmtResult StrBlock) { - return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, DirLoc, - EndLoc, Clauses, StrBlock); + return getSema().OpenACC().ActOnEndStmtDirective( + K, BeginLoc, DirLoc, SourceLocation{}, SourceLocation{}, {}, + SourceLocation{}, EndLoc, Clauses, StrBlock); } StmtResult RebuildOpenACCLoopConstruct(SourceLocation BeginLoc, @@ -4097,7 +4098,8 @@ public: ArrayRef<OpenACCClause *> Clauses, StmtResult Loop) { return getSema().OpenACC().ActOnEndStmtDirective( - OpenACCDirectiveKind::Loop, BeginLoc, DirLoc, EndLoc, Clauses, Loop); + OpenACCDirectiveKind::Loop, BeginLoc, DirLoc, SourceLocation{}, + SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, Loop); } StmtResult RebuildOpenACCCombinedConstruct(OpenACCDirectiveKind K, @@ -4106,8 +4108,9 @@ public: SourceLocation EndLoc, ArrayRef<OpenACCClause *> Clauses, StmtResult Loop) { - return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, DirLoc, - EndLoc, Clauses, Loop); + return getSema().OpenACC().ActOnEndStmtDirective( + K, BeginLoc, DirLoc, SourceLocation{}, SourceLocation{}, {}, + SourceLocation{}, EndLoc, Clauses, Loop); } StmtResult RebuildOpenACCDataConstruct(SourceLocation BeginLoc, @@ -4115,9 +4118,9 @@ public: SourceLocation EndLoc, ArrayRef<OpenACCClause *> Clauses, StmtResult StrBlock) { - return getSema().OpenACC().ActOnEndStmtDirective(OpenACCDirectiveKind::Data, - BeginLoc, DirLoc, EndLoc, - Clauses, StrBlock); + return getSema().OpenACC().ActOnEndStmtDirective( + OpenACCDirectiveKind::Data, BeginLoc, DirLoc, SourceLocation{}, + SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, StrBlock); } StmtResult @@ -4125,7 +4128,8 @@ public: SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef<OpenACCClause *> Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( - OpenACCDirectiveKind::EnterData, BeginLoc, DirLoc, EndLoc, Clauses, {}); + OpenACCDirectiveKind::EnterData, BeginLoc, DirLoc, SourceLocation{}, + SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); } StmtResult @@ -4133,7 +4137,8 @@ public: SourceLocation DirLoc, SourceLocation EndLoc, ArrayRef<OpenACCClause *> Clauses) { return getSema().OpenACC().ActOnEndStmtDirective( - OpenACCDirectiveKind::ExitData, BeginLoc, DirLoc, EndLoc, Clauses, {}); + OpenACCDirectiveKind::ExitData, BeginLoc, DirLoc, SourceLocation{}, + SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {}); } StmtResult RebuildOpenACCHostDataConstruct(SourceLocation BeginLoc, @@ -4142,8 +4147,21 @@ public: ArrayRef<OpenACCClause *> Clauses, StmtResult StrBlock) { return getSema().OpenACC().ActOnEndStmtDirective( - OpenACCDirectiveKind::HostData, BeginLoc, DirLoc, EndLoc, Clauses, - StrBlock); + OpenACCDirectiveKind::HostData, BeginLoc, DirLoc, SourceLocation{}, + SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, StrBlock); + } + + StmtResult RebuildOpenACCWaitConstruct( + SourceLocation BeginLoc, SourceLocation DirLoc, SourceLocation LParenLoc, + Expr *DevNumExpr, SourceLocation QueuesLoc, ArrayRef<Expr *> QueueIdExprs, + SourceLocation RParenLoc, SourceLocation EndLoc, + ArrayRef<OpenACCClause *> Clauses) { + llvm::SmallVector<Expr *> Exprs; + Exprs.push_back(DevNumExpr); + Exprs.insert(Exprs.end(), QueueIdExprs.begin(), QueueIdExprs.end()); + return getSema().OpenACC().ActOnEndStmtDirective( + OpenACCDirectiveKind::Wait, BeginLoc, DirLoc, LParenLoc, QueuesLoc, + Exprs, RParenLoc, EndLoc, Clauses, {}); } ExprResult RebuildOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc) { @@ -12331,6 +12349,50 @@ StmtResult TreeTransform<Derived>::TransformOpenACCHostDataConstruct( } template <typename Derived> +StmtResult +TreeTransform<Derived>::TransformOpenACCWaitConstruct(OpenACCWaitConstruct *C) { + getSema().OpenACC().ActOnConstruct(C->getDirectiveKind(), C->getBeginLoc()); + + ExprResult DevNumExpr; + if (C->hasDevNumExpr()) { + DevNumExpr = getDerived().TransformExpr(C->getDevNumExpr()); + + if (DevNumExpr.isUsable()) + DevNumExpr = getSema().OpenACC().ActOnIntExpr( + OpenACCDirectiveKind::Wait, OpenACCClauseKind::Invalid, + C->getBeginLoc(), DevNumExpr.get()); + } + + llvm::SmallVector<Expr *> QueueIdExprs; + + for (Expr *QE : C->getQueueIdExprs()) { + assert(QE && "Null queue id expr?"); + ExprResult NewEQ = getDerived().TransformExpr(QE); + + if (!NewEQ.isUsable()) + break; + NewEQ = getSema().OpenACC().ActOnIntExpr(OpenACCDirectiveKind::Wait, + OpenACCClauseKind::Invalid, + C->getBeginLoc(), NewEQ.get()); + if (NewEQ.isUsable()) + QueueIdExprs.push_back(NewEQ.get()); + } + + llvm::SmallVector<OpenACCClause *> TransformedClauses = + getDerived().TransformOpenACCClauseList(C->getDirectiveKind(), + C->clauses()); + + if (getSema().OpenACC().ActOnStartStmtDirective( + C->getDirectiveKind(), C->getBeginLoc(), TransformedClauses)) + return StmtError(); + + return getDerived().RebuildOpenACCWaitConstruct( + C->getBeginLoc(), C->getDirectiveLoc(), C->getLParenLoc(), + DevNumExpr.isUsable() ? DevNumExpr.get() : nullptr, C->getQueuesLoc(), + QueueIdExprs, C->getRParenLoc(), C->getEndLoc(), TransformedClauses); +} + +template <typename Derived> ExprResult TreeTransform<Derived>::TransformOpenACCAsteriskSizeExpr( OpenACCAsteriskSizeExpr *E) { if (getDerived().AlwaysRebuild()) diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 21ad6c5..8fe0412 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2870,6 +2870,22 @@ void ASTStmtReader::VisitOpenACCHostDataConstruct(OpenACCHostDataConstruct *S) { VisitOpenACCAssociatedStmtConstruct(S); } +void ASTStmtReader::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) { + VisitStmt(S); + // Consume the count of Expressions. + (void)Record.readInt(); + VisitOpenACCConstructStmt(S); + S->LParenLoc = Record.readSourceLocation(); + S->RParenLoc = Record.readSourceLocation(); + S->QueuesLoc = Record.readSourceLocation(); + + for (unsigned I = 0; I < S->NumExprs; ++I) { + S->getExprPtr()[I] = cast_if_present<Expr>(Record.readSubStmt()); + assert((I == 0 || S->getExprPtr()[I] != nullptr) && + "Only first expression should be null"); + } +} + //===----------------------------------------------------------------------===// // HLSL Constructs/Directives. //===----------------------------------------------------------------------===// @@ -4365,6 +4381,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = OpenACCHostDataConstruct::CreateEmpty(Context, NumClauses); break; } + case STMT_OPENACC_WAIT_CONSTRUCT: { + unsigned NumExprs = Record[ASTStmtReader::NumStmtFields]; + unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1]; + S = OpenACCWaitConstruct::CreateEmpty(Context, NumExprs, NumClauses); + break; + } case EXPR_REQUIRES: { unsigned numLocalParameters = Record[ASTStmtReader::NumExprFields]; unsigned numRequirement = Record[ASTStmtReader::NumExprFields + 1]; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index e55cbe1..f13443d 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2951,6 +2951,20 @@ void ASTStmtWriter::VisitOpenACCHostDataConstruct(OpenACCHostDataConstruct *S) { Code = serialization::STMT_OPENACC_HOST_DATA_CONSTRUCT; } +void ASTStmtWriter::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) { + VisitStmt(S); + Record.push_back(S->getExprs().size()); + VisitOpenACCConstructStmt(S); + Record.AddSourceLocation(S->LParenLoc); + Record.AddSourceLocation(S->RParenLoc); + Record.AddSourceLocation(S->QueuesLoc); + + for(Expr *E : S->getExprs()) + Record.AddStmt(E); + + Code = serialization::STMT_OPENACC_WAIT_CONSTRUCT; +} + //===----------------------------------------------------------------------===// // HLSL Constructs/Directives. //===----------------------------------------------------------------------===// diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index ae43c595..0a74a80 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1829,6 +1829,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::OpenACCEnterDataConstructClass: case Stmt::OpenACCExitDataConstructClass: case Stmt::OpenACCHostDataConstructClass: + case Stmt::OpenACCWaitConstructClass: case Stmt::OMPUnrollDirectiveClass: case Stmt::OMPMetaDirectiveClass: case Stmt::HLSLOutArgExprClass: { diff --git a/clang/test/AST/ast-print-openacc-wait-construct.cpp b/clang/test/AST/ast-print-openacc-wait-construct.cpp new file mode 100644 index 0000000..3535459 --- /dev/null +++ b/clang/test/AST/ast-print-openacc-wait-construct.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -fopenacc -ast-print %s -o - | FileCheck %s + +void uses() { + int *iPtr; + int I; + float array[5]; + +// CHECK: #pragma acc wait() if(I == array[I]) +#pragma acc wait() if(I == array[I]) + +// CHECK: #pragma acc wait(*iPtr, I) async +#pragma acc wait(*iPtr, I) async + +// CHECK: #pragma acc wait(queues: *iPtr, I) async(*iPtr) +#pragma acc wait(queues:*iPtr, I) async(*iPtr) + +// CHECK: #pragma acc wait(devnum: I : *iPtr, I) async(I) +#pragma acc wait(devnum:I:*iPtr, I) async(I) + +// CHECK: #pragma acc wait(devnum: I : queues: *iPtr, I) if(I == array[I]) async(I) +#pragma acc wait(devnum:I:queues:*iPtr, I) if(I == array[I]) async(I) +} diff --git a/clang/test/CoverageMapping/switch.cpp b/clang/test/CoverageMapping/switch.cpp index a1fee64..db4cddb 100644 --- a/clang/test/CoverageMapping/switch.cpp +++ b/clang/test/CoverageMapping/switch.cpp @@ -2,13 +2,13 @@ // CHECK: foo void foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+11]]:2 = #0 - switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = 0, ((#0 - #2) - #3) + switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#2 + #3), ((#0 - #2) - #3) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+5]]:10 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:11 = #2 - return; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0 + return; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:12 -> [[@LINE+1]]:3 = 0 case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #3 - break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0 + break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3) } // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE+1]]:3 = #1 int x = 0; // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:2 = #1 @@ -28,14 +28,14 @@ void bar(int i) { // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+21]]:2 = #0 nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:11 -> [[@LINE-1]]:12 = 0, #2 // CHECK-NEXT: File 0, [[@LINE-1]]:5 -> [[@LINE-1]]:10 = 0 switch (i) // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+11]]:2 = #3 - case 1: // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:11 -> [[@LINE-1]]:12 = 0, (#3 - #5) + case 1: // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:11 -> [[@LINE-1]]:12 = #5, (#3 - #5) // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+1]]:10 = #5 - nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #5, 0 + nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #5, (#3 - #5) // CHECK-NEXT: File 0, [[@LINE+1]]:3 -> [[@LINE+7]]:2 = #4 - switch (i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:12 = 0, (#4 - #7) + switch (i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:12 = #7, (#4 - #7) nop(); // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+2]]:10 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #7 - nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, 0 + nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, (#4 - #7) } nop(); // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:2 = #6 } @@ -53,35 +53,35 @@ int main() { // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+39]]:2 = #0 int i = 0; switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+8]]:10 = 0 case 0: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = #2 - i = 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0 + i = 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2) break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:3 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = #3 - i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0 + i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3) break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:3 = 0 default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #4 - break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #4, 0 + break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #4, (#0 - #4) } // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE+1]]:3 = #1 switch(i) { // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+27]]:2 = #1 case 0: // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+7]]:10 = 0 i = 1; // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+1]]:10 = #6 - break; // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #6, 0 + break; // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #6, (#1 - #6) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:11 -> [[@LINE+1]]:3 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:10 = #7 - i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, 0 + i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, (#1 - #7) default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = (#7 + #8) - break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #8, 0 + break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #8, (#1 - #8) } // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE+2]]:3 = #5 // CHECK-NEXT: File 0, [[@LINE+1]]:3 -> [[@LINE+17]]:2 = #5 - switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = 0, ((((#5 - #10) - #11) - #12) - #13) + switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (((#10 + #11) + #12) + #13), ((((#5 - #10) - #11) - #12) - #13) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+8]]:11 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+7]]:11 = #10 - // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #10, 0 + // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #10, (#5 - #10) case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+5]]:11 = (#10 + #11) - i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #11, 0 + i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #11, (#5 - #11) case 3: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:11 = ((#10 + #11) + #12) - // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #12, 0 + // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #12, (#5 - #12) case 4: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:11 = (((#10 + #11) + #12) + #13) - i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #13, 0 + i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #13, (#5 - #13) } foo(1); // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:11 = #9 @@ -95,10 +95,10 @@ int pr44011(int i) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> {{.*}}:2 = #0 switch (i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+6]]:13 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = #2 - return 0; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0 + return 0; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+1]]:3 = 0 default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = #3 - return 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #3, 0 + return 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #3, (#0 - #3) } } // A region for counter #1 is missing due to the missing return. @@ -106,17 +106,17 @@ int pr44011(int i) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> {{.*}}:2 = #0 // FIXME: End location for "case 1" shouldn't point at the end of the switch. // CHECK: fallthrough int fallthrough(int i) { // CHECK-NEXT: File 0, [[@LINE]]:24 -> [[@LINE+14]]:2 = #0 - // CHECK-NEXT: Branch,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:11 = 0, ((((#0 - #2) - #3) - #4) - #5) + // CHECK-NEXT: Branch,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:11 = (((#2 + #3) + #4) + #5), ((((#0 - #2) - #3) - #4) - #5) switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+10]]:10 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+9]]:10 = #2 - i = 23; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0 + i = 23; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2) case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = (#2 + #3) - i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0 + i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3) break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:3 = 0 case 3: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+4]]:10 = #4 - // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, 0 + // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, (#0 - #4) case 4: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = (#4 + #5) - i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #5, 0 + i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #5, (#0 - #5) break; } } @@ -126,12 +126,12 @@ void abort(void) __attribute((noreturn)); int noret(int x) { // CHECK-NEXT: File 0, [[@LINE]]:18 -> [[@LINE+11]]:2 switch (x) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+8]]:14 = 0 default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:12 - abort(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, 0 + abort(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, (#0 - #2) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+1]]:3 = 0 case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 - return 5; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0 + return 5; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3) // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+1]]:3 = 0 case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:14 - return 10; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, 0 + return 10; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, (#0 - #4) } } diff --git a/clang/test/CoverageMapping/switchmacro.c b/clang/test/CoverageMapping/switchmacro.c index 0696e74..4c98cc7 100644 --- a/clang/test/CoverageMapping/switchmacro.c +++ b/clang/test/CoverageMapping/switchmacro.c @@ -6,7 +6,7 @@ int foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:16 -> {{[0-9]+}}:2 = #0 switch (i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> {{[0-9]+}}:11 = 0 default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> {{[0-9]+}}:11 = #2 - // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, 0 + // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, (#0 - #2) if (i == 1) // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:15 = #2 // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:9 -> [[@LINE-1]]:15 = #3, (#2 - #3) return 0; // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:15 = #3 @@ -15,7 +15,7 @@ int foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:16 -> {{[0-9]+}}:2 = #0 // CHECK-NEXT: File 0, [[@LINE+1]]:8 -> {{[0-9]+}}:11 = (#2 - #3) FOO(1); case 0: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:13 = ((#2 + #4) - #3) - // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, 0 + // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, (#0 - #4) return 2; // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+4]]:3 = 0 // CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:3 -> [[@LINE+2]]:6 = 0 diff --git a/clang/test/ParserOpenACC/parse-wait-construct.c b/clang/test/ParserOpenACC/parse-wait-construct.c index 8f7ea8e..17b7ecb 100644 --- a/clang/test/ParserOpenACC/parse-wait-construct.c +++ b/clang/test/ParserOpenACC/parse-wait-construct.c @@ -3,171 +3,135 @@ void func() { int i, j; - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} #pragma acc wait - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait clause-list - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait ( - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} #pragma acc wait () - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait () clause-list - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait (devnum: - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{expected expression}} #pragma acc wait (devnum:) - // expected-error@+3{{expected expression}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{expected expression}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait (devnum:) clause-list - // expected-error@+4{{expected ':'}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{expected ':'}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait (devnum: i + j - // expected-error@+2{{expected ':'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{expected ':'}} #pragma acc wait (devnum: i + j) - // expected-error@+3{{expected ':'}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{expected ':'}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait (devnum: i + j) clause-list - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait (queues: - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} #pragma acc wait (queues:) - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait (queues:) clause-list - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait (devnum: i + j:queues: - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} #pragma acc wait (devnum: i + j:queues:) - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait (devnum: i + j:queues:) clause-list - // expected-error@+5{{use of undeclared identifier 'devnum'}} - // expected-error@+4{{expected ','}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+4{{use of undeclared identifier 'devnum'}} + // expected-error@+3{{expected ','}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait (queues:devnum: i + j - // expected-error@+3{{use of undeclared identifier 'devnum'}} - // expected-error@+2{{expected ','}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{use of undeclared identifier 'devnum'}} + // expected-error@+1{{expected ','}} #pragma acc wait (queues:devnum: i + j) - // expected-error@+4{{use of undeclared identifier 'devnum'}} - // expected-error@+3{{expected ','}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{use of undeclared identifier 'devnum'}} + // expected-error@+2{{expected ','}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait (queues:devnum: i + j) clause-list - // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait(i, j, 1+1, 3.3 - // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} #pragma acc wait(i, j, 1+1, 3.3) - // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait(i, j, 1+1, 3.3) clause-list - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait(, - // expected-error@+2{{expected expression}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{expected expression}} #pragma acc wait(,) - // expected-error@+3{{expected expression}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{expected expression}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait(,) clause-list - // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait(queues:i, j, 1+1, 3.3 - // expected-error@+5{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+4{{expected expression}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+3{{expected expression}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait(queues:i, j, 1+1, 3.3, - // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} #pragma acc wait(queues:i, j, 1+1, 3.3) - // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait(queues:i, j, 1+1, 3.3) clause-list - // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait(devnum:3:i, j, 1+1, 3.3 - // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} #pragma acc wait(devnum:3:i, j, 1+1, 3.3) - // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait(devnum:3:i, j, 1+1, 3.3) clause-list - // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+3{{expected ')'}} - // expected-note@+2{{to match this '('}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+2{{expected ')'}} + // expected-note@+1{{to match this '('}} #pragma acc wait(devnum:3:queues:i, j, 1+1, 3.3 - // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} #pragma acc wait(devnum:3:queues:i, j, 1+1, 3.3) - // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} - // expected-error@+2{{invalid OpenACC clause 'clause'}} - // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} + // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}} + // expected-error@+1{{invalid OpenACC clause 'clause'}} #pragma acc wait(devnum:3:queues:i, j, 1+1, 3.3) clause-list } diff --git a/clang/test/SemaOpenACC/combined-construct-default-clause.c b/clang/test/SemaOpenACC/combined-construct-default-clause.c index 43c2883..2d77faa 100644 --- a/clang/test/SemaOpenACC/combined-construct-default-clause.c +++ b/clang/test/SemaOpenACC/combined-construct-default-clause.c @@ -35,7 +35,6 @@ void SingleOnly() { #pragma acc loop default(none) for(int i = 5; i < 10;++i); - // expected-warning@+2{{OpenACC construct 'wait' not yet implemented}} // expected-error@+1{{OpenACC 'default' clause is not valid on 'wait' directive}} #pragma acc wait default(none) while(0); diff --git a/clang/test/SemaOpenACC/compute-construct-default-clause.c b/clang/test/SemaOpenACC/compute-construct-default-clause.c index dfa5cd3..86a7587 100644 --- a/clang/test/SemaOpenACC/compute-construct-default-clause.c +++ b/clang/test/SemaOpenACC/compute-construct-default-clause.c @@ -35,7 +35,6 @@ void SingleOnly() { #pragma acc loop default(none) for(int i = 5; i < 10;++i); - // expected-warning@+2{{OpenACC construct 'wait' not yet implemented}} // expected-error@+1{{OpenACC 'default' clause is not valid on 'wait' directive}} #pragma acc wait default(none) while(0); diff --git a/clang/test/SemaOpenACC/unimplemented-construct.c b/clang/test/SemaOpenACC/unimplemented-construct.c index 3f4bc37..42737eb 100644 --- a/clang/test/SemaOpenACC/unimplemented-construct.c +++ b/clang/test/SemaOpenACC/unimplemented-construct.c @@ -4,8 +4,8 @@ #pragma acc routine struct S { -// expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}} -#pragma acc wait +// expected-warning@+1{{OpenACC construct 'set' not yet implemented, pragma ignored}} +#pragma acc set int foo; }; diff --git a/clang/test/SemaOpenACC/wait-construct-ast.cpp b/clang/test/SemaOpenACC/wait-construct-ast.cpp new file mode 100644 index 0000000..58214f1 --- /dev/null +++ b/clang/test/SemaOpenACC/wait-construct-ast.cpp @@ -0,0 +1,225 @@ +// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s + +// Test this with PCH. +// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s +// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s + +#ifndef PCH_HELPER +#define PCH_HELPER + +int some_int(); +long some_long(); + +void NormalFunc() { + // CHECK-LABEL: NormalFunc + // CHECK-NEXT: CompoundStmt + +#pragma acc wait async(some_int()) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: async clause + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' +#pragma acc wait() async + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: async clause +#pragma acc wait(some_int(), some_long()) if (some_int() < some_long()) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' + // CHECK-NEXT: if clause + // CHECK-NEXT: BinaryOperator{{.*}} 'bool' '<' + // CHECK-NEXT: ImplicitCastExpr{{.*}} 'long' + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' +#pragma acc wait(queues:some_int(), some_long()) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' +#pragma acc wait(devnum:some_int() : queues:some_int(), some_long()) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' +#pragma acc wait(devnum:some_int() : some_int(), some_long()) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' +} + +template<typename T> +void TemplFunc(T t) { + // CHECK-LABEL: FunctionTemplateDecl {{.*}}TemplFunc + // CHECK-NEXT: TemplateTypeParmDecl + // CHECK-NEXT: FunctionDecl{{.*}}TemplFunc + // CHECK-NEXT: ParmVarDecl{{.*}} t 'T' + // CHECK-NEXT: CompoundStmt + +#pragma acc wait async(T::value) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: async clause + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' + // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T' +#pragma acc wait() async + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: async clause +#pragma acc wait(t, T::value) if (T::value > t) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue ParmVar{{.*}} 't' 'T' + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' + // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T' + // CHECK-NEXT: if clause + // CHECK-NEXT: BinaryOperator{{.*}} '<dependent type>' '>' + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' + // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T' + // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue ParmVar{{.*}} 't' 'T' +#pragma acc wait(queues:typename T::IntTy{}, T::value) if (typename T::IntTy{} < typename T::ShortTy{}) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::IntTy' list + // CHECK-NEXT: InitListExpr + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' + // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T' + // CHECK-NEXT: if clause + // CHECK-NEXT: BinaryOperator{{.*}} '<dependent type>' '<' + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::IntTy' list + // CHECK-NEXT: InitListExpr + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::ShortTy' list + // CHECK-NEXT: InitListExpr +#pragma acc wait(devnum:typename T::ShortTy{} : queues:some_int(), T::value) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::ShortTy' list + // CHECK-NEXT: InitListExpr + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' + // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T' +#pragma acc wait(devnum:typename T::ShortTy{} : T::value, some_long()) + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::ShortTy' list + // CHECK-NEXT: InitListExpr + // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>' + // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' + + // Instantiation: + // CHECK-NEXT: FunctionDecl{{.*}} TemplFunc 'void (HasInt)' implicit_instantiation + // CHECK-NEXT: TemplateArgument type 'HasInt' + // CHECK-NEXT: RecordType{{.*}} 'HasInt' + // CHECK-NEXT: CXXRecord{{.*}} 'HasInt' + // CHECK-NEXT: ParmVarDecl{{.*}} t 'HasInt' + // CHECK-NEXT: CompoundStmt + + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: async clause + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt' + // + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: async clause + // + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: ImplicitCastExpr{{.*}}'char' <UserDefinedConversion> + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}}.operator char + // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar{{.*}} 't' 'HasInt' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt' + // CHECK-NEXT: if clause + // CHECK-NEXT: BinaryOperator{{.*}} 'bool' '>' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' <IntegralCast> + // CHECK-NEXT: ImplicitCastExpr{{.*}}'char' <UserDefinedConversion> + // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char' + // CHECK-NEXT: MemberExpr{{.*}}.operator char + // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar{{.*}} 't' 'HasInt' + // + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: <<<NULL>> + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::IntTy':'int' + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::IntTy':'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt' + // CHECK-NEXT: if clause + // CHECK-NEXT: BinaryOperator{{.*}} 'bool' '<' + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::IntTy':'int' + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::IntTy':'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::ShortTy':'short' + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short' + // + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::ShortTy':'short' + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short' + // CHECK-NEXT: CallExpr{{.*}}'int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_int' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt' + // + // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait + // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::ShortTy':'short' + // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' + // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int' + // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt' + // CHECK-NEXT: CallExpr{{.*}}'long' + // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay> + // CHECK-NEXT: DeclRefExpr{{.*}}'some_long' +} + +struct HasInt { + using IntTy = int; + using ShortTy = short; + static constexpr int value = 1; + + operator char(); +}; +void use() { + TemplFunc(HasInt{}); +} +#endif diff --git a/clang/test/SemaOpenACC/wait-construct.cpp b/clang/test/SemaOpenACC/wait-construct.cpp new file mode 100644 index 0000000..a68fc54 --- /dev/null +++ b/clang/test/SemaOpenACC/wait-construct.cpp @@ -0,0 +1,95 @@ +// RUN: %clang_cc1 %s -fopenacc -verify + +struct NotConvertible{} NC; +short getS(); +int getI(); + +struct AmbiguousConvert{ + operator int(); // #AMBIG_INT + operator short(); // #AMBIG_SHORT + operator float(); +} Ambiguous; + +struct ExplicitConvertOnly { + explicit operator int() const; // #EXPL_CONV +} Explicit; + +void uses() { + int arr[5]; +#pragma acc wait(getS(), getI()) +#pragma acc wait(devnum:getS(): getI()) +#pragma acc wait(devnum:getS(): queues: getI(), getS()) +#pragma acc wait(devnum:getS(): getI(), getS()) + + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('struct NotConvertible' invalid)}} +#pragma acc wait(devnum:NC : 5) + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('struct NotConvertible' invalid)}} +#pragma acc wait(devnum:5 : NC) + // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('int[5]' invalid)}} + // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('int[5]' invalid)}} + // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('struct NotConvertible' invalid)}} +#pragma acc wait(devnum:arr : queues: arr, NC, 5) + + // expected-error@+3{{multiple conversions from expression type 'struct AmbiguousConvert' to an integral type}} + // expected-note@#AMBIG_INT{{conversion to integral type 'int'}} + // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}} +#pragma acc wait(Ambiguous) + + // expected-error@+2{{OpenACC integer expression type 'struct ExplicitConvertOnly' requires explicit conversion to 'int'}} + // expected-note@#EXPL_CONV{{conversion to integral type 'int'}} +#pragma acc wait(4, Explicit, 5) + + // expected-error@+1{{use of undeclared identifier 'queues'}} +#pragma acc wait(devnum: queues: 5) + +#pragma acc wait async +#pragma acc wait async(getI()) + // expected-error@+1{{OpenACC clause 'async' requires expression of integer type ('struct NotConvertible' invalid)}} +#pragma acc wait async(NC) + +#pragma acc wait if(getI() < getS()) + // expected-error@+1{{value of type 'struct NotConvertible' is not contextually convertible to 'bool'}} +#pragma acc wait if(NC) + +} + +template<typename T> +void TestInst() { + // expected-error@+4{{multiple conversions from expression type 'const AmbiguousConvert' to an integral type}} + // expected-note@#INST{{in instantiation of function template specialization}} + // expected-note@#AMBIG_INT{{conversion to integral type 'int'}} + // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}} +#pragma acc wait(devnum:T::value :queues:T::ACValue) + + // expected-error@+5{{OpenACC integer expression type 'const ExplicitConvertOnly' requires explicit conversion to 'int'}} + // expected-note@#EXPL_CONV{{conversion to integral type 'int'}} + // expected-error@+3{{multiple conversions from expression type 'const AmbiguousConvert' to an integral type}} + // expected-note@#AMBIG_INT{{conversion to integral type 'int'}} + // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}} +#pragma acc wait(devnum:T::EXValue :queues:T::ACValue) + + // expected-error@+1{{no member named 'Invalid' in 'HasInt'}} +#pragma acc wait(queues: T::Invalid, T::Invalid2) + + // expected-error@+3{{multiple conversions from expression type 'const AmbiguousConvert' to an integral type}} + // expected-note@#AMBIG_INT{{conversion to integral type 'int'}} + // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}} +#pragma acc wait async(T::ACValue) + +#pragma acc wait if(T::value < T{}) + // expected-error@+1{{value of type 'const ExplicitConvertOnly' is not contextually convertible to 'bool'}} +#pragma acc wait if(T::EXValue) +} + +struct HasInt { + using IntTy = int; + using ShortTy = short; + static constexpr int value = 1; + static constexpr AmbiguousConvert ACValue; + static constexpr ExplicitConvertOnly EXValue; + + operator char(); +}; +void Inst() { + TestInst<HasInt>(); // #INST +} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 7015821..9bdc4c9 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2189,6 +2189,7 @@ public: void VisitOpenACCEnterDataConstruct(const OpenACCEnterDataConstruct *D); void VisitOpenACCExitDataConstruct(const OpenACCExitDataConstruct *D); void VisitOpenACCHostDataConstruct(const OpenACCHostDataConstruct *D); + void VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *D); void VisitOMPExecutableDirective(const OMPExecutableDirective *D); void VisitOMPLoopBasedDirective(const OMPLoopBasedDirective *D); void VisitOMPLoopDirective(const OMPLoopDirective *D); @@ -3632,6 +3633,12 @@ void EnqueueVisitor::VisitOpenACCHostDataConstruct( EnqueueChildren(Clause); } +void EnqueueVisitor::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *C) { + EnqueueChildren(C); + for (auto *Clause : C->clauses()) + EnqueueChildren(Clause); +} + void EnqueueVisitor::VisitAnnotateAttr(const AnnotateAttr *A) { EnqueueChildren(A); } @@ -6394,6 +6401,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("OpenACCExitDataConstruct"); case CXCursor_OpenACCHostDataConstruct: return cxstring::createRef("OpenACCHostDataConstruct"); + case CXCursor_OpenACCWaitConstruct: + return cxstring::createRef("OpenACCWaitConstruct"); } llvm_unreachable("Unhandled CXCursorKind"); diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index 26935c4..b9fd3b4 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -900,6 +900,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, case Stmt::OpenACCHostDataConstructClass: K = CXCursor_OpenACCHostDataConstruct; break; + case Stmt::OpenACCWaitConstructClass: + K = CXCursor_OpenACCWaitConstruct; + break; case Stmt::OMPTargetParallelGenericLoopDirectiveClass: K = CXCursor_OMPTargetParallelGenericLoopDirective; break; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index eeb857a..e892f10 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -14503,10 +14503,13 @@ TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) { FormatStyle DoNotMergeNoColumnLimit = NoColumnLimit; DoNotMergeNoColumnLimit.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None; - verifyFormat("A()\n" - " : b(0) {\n" + verifyFormat("A() : b(0) {\n" "}", - "A():b(0){}", DoNotMergeNoColumnLimit); + DoNotMergeNoColumnLimit); + verifyNoChange("A()\n" + " : b(0) {\n" + "}", + DoNotMergeNoColumnLimit); verifyFormat("A()\n" " : b(0) {\n" "}", diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 38658fc..b2fb522 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2277,6 +2277,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) { ASSERT_EQ(Tokens.size(), 21u) << Tokens; EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown); + Tokens = annotate("Class<Type>{foo}->func(arg);"); + ASSERT_EQ(Tokens.size(), 14u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_Unknown); // Not FunctionLBrace + EXPECT_BRACE_KIND(Tokens[4], BK_BracedInit); + EXPECT_BRACE_KIND(Tokens[6], BK_BracedInit); + EXPECT_TOKEN(Tokens[7], tok::arrow, TT_Unknown); + auto Style = getLLVMStyle(); Style.StatementAttributeLikeMacros.push_back("emit"); Tokens = annotate("emit foo()->bar;", Style); diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index bc0020e..6899505 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -392,6 +392,10 @@ struct IntrinsicLibrary { fir::ExtendedValue genSum(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); void genSignalSubroutine(llvm::ArrayRef<fir::ExtendedValue>); void genSleep(llvm::ArrayRef<fir::ExtendedValue>); + void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>); + mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>); fir::ExtendedValue genSystem(std::optional<mlir::Type>, mlir::ArrayRef<fir::ExtendedValue> args); void genSystemClock(llvm::ArrayRef<fir::ExtendedValue>); @@ -401,6 +405,9 @@ struct IntrinsicLibrary { llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genTranspose(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); + void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>); + void genThreadFenceBlock(llvm::ArrayRef<fir::ExtendedValue>); + void genThreadFenceSystem(llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 84fc77a..bbbacd2 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -646,6 +646,10 @@ static constexpr IntrinsicHandler handlers[]{ {"dim", asValue}, {"mask", asBox, handleDynamicOptional}}}, /*isElemental=*/false}, + {"syncthreads", &I::genSyncThreads, {}, /*isElemental=*/false}, + {"syncthreads_and", &I::genSyncThreadsAnd, {}, /*isElemental=*/false}, + {"syncthreads_count", &I::genSyncThreadsCount, {}, /*isElemental=*/false}, + {"syncthreads_or", &I::genSyncThreadsOr, {}, /*isElemental=*/false}, {"system", &I::genSystem, {{{"command", asBox}, {"exitstat", asBox, handleDynamicOptional}}}, @@ -655,6 +659,9 @@ static constexpr IntrinsicHandler handlers[]{ {{{"count", asAddr}, {"count_rate", asAddr}, {"count_max", asAddr}}}, /*isElemental=*/false}, {"tand", &I::genTand}, + {"threadfence", &I::genThreadFence, {}, /*isElemental=*/false}, + {"threadfence_block", &I::genThreadFenceBlock, {}, /*isElemental=*/false}, + {"threadfence_system", &I::genThreadFenceSystem, {}, /*isElemental=*/false}, {"trailz", &I::genTrailz}, {"transfer", &I::genTransfer, @@ -7437,6 +7444,52 @@ IntrinsicLibrary::genSum(mlir::Type resultType, resultType, args); } +// SYNCTHREADS +void IntrinsicLibrary::genSyncThreads(llvm::ArrayRef<fir::ExtendedValue> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0"; + mlir::FunctionType funcType = + mlir::FunctionType::get(builder.getContext(), {}, {}); + auto funcOp = builder.createFunction(loc, funcName, funcType); + llvm::SmallVector<mlir::Value> noArgs; + builder.create<fir::CallOp>(loc, funcOp, noArgs); +} + +// SYNCTHREADS_AND +mlir::Value +IntrinsicLibrary::genSyncThreadsAnd(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.and"; + mlir::MLIRContext *context = builder.getContext(); + mlir::FunctionType ftype = + mlir::FunctionType::get(context, {resultType}, {args[0].getType()}); + auto funcOp = builder.createFunction(loc, funcName, ftype); + return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0); +} + +// SYNCTHREADS_COUNT +mlir::Value +IntrinsicLibrary::genSyncThreadsCount(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.popc"; + mlir::MLIRContext *context = builder.getContext(); + mlir::FunctionType ftype = + mlir::FunctionType::get(context, {resultType}, {args[0].getType()}); + auto funcOp = builder.createFunction(loc, funcName, ftype); + return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0); +} + +// SYNCTHREADS_OR +mlir::Value +IntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.or"; + mlir::MLIRContext *context = builder.getContext(); + mlir::FunctionType ftype = + mlir::FunctionType::get(context, {resultType}, {args[0].getType()}); + auto funcOp = builder.createFunction(loc, funcName, ftype); + return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0); +} + // SYSTEM fir::ExtendedValue IntrinsicLibrary::genSystem(std::optional<mlir::Type> resultType, @@ -7567,6 +7620,38 @@ IntrinsicLibrary::genTranspose(mlir::Type resultType, return readAndAddCleanUp(resultMutableBox, resultType, "TRANSPOSE"); } +// THREADFENCE +void IntrinsicLibrary::genThreadFence(llvm::ArrayRef<fir::ExtendedValue> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.gl"; + mlir::FunctionType funcType = + mlir::FunctionType::get(builder.getContext(), {}, {}); + auto funcOp = builder.createFunction(loc, funcName, funcType); + llvm::SmallVector<mlir::Value> noArgs; + builder.create<fir::CallOp>(loc, funcOp, noArgs); +} + +// THREADFENCE_BLOCK +void IntrinsicLibrary::genThreadFenceBlock( + llvm::ArrayRef<fir::ExtendedValue> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.cta"; + mlir::FunctionType funcType = + mlir::FunctionType::get(builder.getContext(), {}, {}); + auto funcOp = builder.createFunction(loc, funcName, funcType); + llvm::SmallVector<mlir::Value> noArgs; + builder.create<fir::CallOp>(loc, funcOp, noArgs); +} + +// THREADFENCE_SYSTEM +void IntrinsicLibrary::genThreadFenceSystem( + llvm::ArrayRef<fir::ExtendedValue> args) { + constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.sys"; + mlir::FunctionType funcType = + mlir::FunctionType::get(builder.getContext(), {}, {}); + auto funcOp = builder.createFunction(loc, funcName, funcType); + llvm::SmallVector<mlir::Value> noArgs; + builder.create<fir::CallOp>(loc, funcOp, noArgs); +} + // TRIM fir::ExtendedValue IntrinsicLibrary::genTrim(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 0224ecf..1402bd4 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -18,27 +18,27 @@ implicit none ! Synchronization Functions interface - attributes(device) subroutine syncthreads() bind(c, name='__syncthreads') + attributes(device) subroutine syncthreads() end subroutine end interface public :: syncthreads interface - attributes(device) integer function syncthreads_and(value) bind(c, name='__syncthreads_and') + attributes(device) integer function syncthreads_and(value) integer :: value end function end interface public :: syncthreads_and interface - attributes(device) integer function syncthreads_count(value) bind(c, name='__syncthreads_count') + attributes(device) integer function syncthreads_count(value) integer :: value end function end interface public :: syncthreads_count interface - attributes(device) integer function syncthreads_or(value) bind(c, name='__syncthreads_or') + attributes(device) integer function syncthreads_or(value) integer :: value end function end interface @@ -54,19 +54,19 @@ implicit none ! Memory Fences interface - attributes(device) subroutine threadfence() bind(c, name='__threadfence') + attributes(device) subroutine threadfence() end subroutine end interface public :: threadfence interface - attributes(device) subroutine threadfence_block() bind(c, name='__threadfence_block') + attributes(device) subroutine threadfence_block() end subroutine end interface public :: threadfence_block interface - attributes(device) subroutine threadfence_system() bind(c, name='__threadfence_system') + attributes(device) subroutine threadfence_system() end subroutine end interface public :: threadfence_system diff --git a/flang/runtime/findloc.cpp b/flang/runtime/findloc.cpp index d05e9c5..b9b1d7f 100644 --- a/flang/runtime/findloc.cpp +++ b/flang/runtime/findloc.cpp @@ -159,14 +159,9 @@ struct NumericFindlocHelper { Terminator &terminator) const { switch (targetCat) { case TypeCategory::Integer: - ApplyIntegerKind< - HELPER<CAT, KIND, TypeCategory::Integer>::template Functor, void>( - targetKind, terminator, result, x, target, kind, dim, mask, back, - terminator); - break; case TypeCategory::Unsigned: ApplyIntegerKind< - HELPER<CAT, KIND, TypeCategory::Unsigned>::template Functor, void>( + HELPER<CAT, KIND, TypeCategory::Integer>::template Functor, void>( targetKind, terminator, result, x, target, kind, dim, mask, back, terminator); break; @@ -235,13 +230,8 @@ void RTDEF(Findloc)(Descriptor &result, const Descriptor &x, RUNTIME_CHECK(terminator, xType.has_value() && targetType.has_value()); switch (xType->first) { case TypeCategory::Integer: - ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer, - TotalNumericFindlocHelper>::template Functor, - void>(xType->second, terminator, targetType->first, targetType->second, - result, x, target, kind, 0, mask, back, terminator); - break; case TypeCategory::Unsigned: - ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Unsigned, + ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer, TotalNumericFindlocHelper>::template Functor, void>(xType->second, terminator, targetType->first, targetType->second, result, x, target, kind, 0, mask, back, terminator); @@ -331,13 +321,8 @@ void RTDEF(FindlocDim)(Descriptor &result, const Descriptor &x, RUNTIME_CHECK(terminator, xType.has_value() && targetType.has_value()); switch (xType->first) { case TypeCategory::Integer: - ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer, - PartialNumericFindlocHelper>::template Functor, - void>(xType->second, terminator, targetType->first, targetType->second, - result, x, target, kind, dim, mask, back, terminator); - break; case TypeCategory::Unsigned: - ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Unsigned, + ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer, PartialNumericFindlocHelper>::template Functor, void>(xType->second, terminator, targetType->first, targetType->second, result, x, target, kind, dim, mask, back, terminator); diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 1331b644..2042bbb 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -17,20 +17,20 @@ attributes(global) subroutine devsub() end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} -! CHECK: fir.call @__syncthreads() +! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> () ! CHECK: fir.call @__syncwarp(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> () -! CHECK: fir.call @__threadfence() -! CHECK: fir.call @__threadfence_block() -! CHECK: fir.call @__threadfence_system() -! CHECK: %{{.*}} = fir.call @__syncthreads_and(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> i32 -! CHECK: %{{.*}} = fir.call @__syncthreads_count(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> i32 -! CHECK: %{{.*}} = fir.call @__syncthreads_or(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> i32 +! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> () +! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> () +! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> () +! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1_i32_0) fastmath<contract> : (i32) -> i32 +! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%c1_i32_1) fastmath<contract> : (i32) -> i32 +! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%c1_i32_2) fastmath<contract> : (i32) -> i32 -! CHECK: func.func private @__syncthreads() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads", fir.proc_attrs = #fir.proc_attrs<bind_c>} +! CHECK: func.func private @llvm.nvvm.barrier0() ! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>} -! CHECK: func.func private @__threadfence() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__threadfence", fir.proc_attrs = #fir.proc_attrs<bind_c>} -! CHECK: func.func private @__threadfence_block() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__threadfence_block", fir.proc_attrs = #fir.proc_attrs<bind_c>} -! CHECK: func.func private @__threadfence_system() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__threadfence_system", fir.proc_attrs = #fir.proc_attrs<bind_c>} -! CHECK: func.func private @__syncthreads_and(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads_and", fir.proc_attrs = #fir.proc_attrs<bind_c>} -! CHECK: func.func private @__syncthreads_count(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads_count", fir.proc_attrs = #fir.proc_attrs<bind_c>} -! CHECK: func.func private @__syncthreads_or(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads_or", fir.proc_attrs = #fir.proc_attrs<bind_c>} +! CHECK: func.func private @llvm.nvvm.membar.gl() +! CHECK: func.func private @llvm.nvvm.membar.cta() +! CHECK: func.func private @llvm.nvvm.membar.sys() +! CHECK: func.func private @llvm.nvvm.barrier0.and(i32) -> i32 +! CHECK: func.func private @llvm.nvvm.barrier0.popc(i32) -> i32 +! CHECK: func.func private @llvm.nvvm.barrier0.or(i32) -> i32 diff --git a/libcxx/include/__algorithm/adjacent_find.h b/libcxx/include/__algorithm/adjacent_find.h index 7672890..2508250 100644 --- a/libcxx/include/__algorithm/adjacent_find.h +++ b/libcxx/include/__algorithm/adjacent_find.h @@ -11,10 +11,8 @@ #define _LIBCPP___ALGORITHM_ADJACENT_FIND_H #include <__algorithm/comp.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__iterator/iterator_traits.h> #include <__type_traits/invoke.h> #include <__utility/move.h> diff --git a/libcxx/include/__algorithm/binary_search.h b/libcxx/include/__algorithm/binary_search.h index 79a5ec0..4940059 100644 --- a/libcxx/include/__algorithm/binary_search.h +++ b/libcxx/include/__algorithm/binary_search.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/lower_bound.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h index cd8b067..a276bb9 100644 --- a/libcxx/include/__algorithm/equal.h +++ b/libcxx/include/__algorithm/equal.h @@ -20,7 +20,6 @@ #include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> -#include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_volatile.h> #include <__utility/move.h> diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h index 4e74ad2..ff6f4f2 100644 --- a/libcxx/include/__algorithm/equal_range.h +++ b/libcxx/include/__algorithm/equal_range.h @@ -17,10 +17,6 @@ #include <__algorithm/upper_bound.h> #include <__config> #include <__functional/identity.h> -#include <__iterator/advance.h> -#include <__iterator/distance.h> -#include <__iterator/iterator_traits.h> -#include <__iterator/next.h> #include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__type_traits/is_constructible.h> diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h index f29633f..5069a72 100644 --- a/libcxx/include/__algorithm/fill_n.h +++ b/libcxx/include/__algorithm/fill_n.h @@ -12,7 +12,6 @@ #include <__algorithm/min.h> #include <__config> #include <__fwd/bit_reference.h> -#include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> #include <__utility/convert_to_integral.h> diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h index 14b8a78..a05d507 100644 --- a/libcxx/include/__algorithm/find.h +++ b/libcxx/include/__algorithm/find.h @@ -24,7 +24,6 @@ #include <__type_traits/invoke.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> -#include <__type_traits/is_same.h> #include <__type_traits/is_signed.h> #include <__utility/move.h> #include <limits> diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h index fc876d8..86b4a3e 100644 --- a/libcxx/include/__algorithm/find_end.h +++ b/libcxx/include/__algorithm/find_end.h @@ -15,7 +15,6 @@ #include <__config> #include <__functional/identity.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> #include <__type_traits/invoke.h> #include <__utility/pair.h> diff --git a/libcxx/include/__algorithm/find_first_of.h b/libcxx/include/__algorithm/find_first_of.h index 4a240f7..45ec133 100644 --- a/libcxx/include/__algorithm/find_first_of.h +++ b/libcxx/include/__algorithm/find_first_of.h @@ -12,7 +12,6 @@ #include <__algorithm/comp.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index 259e527..e08f583 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -14,7 +14,6 @@ #include <__config> #include <__iterator/segmented_iterator.h> #include <__ranges/movable_box.h> -#include <__type_traits/enable_if.h> #include <__utility/in_place.h> #include <__utility/move.h> diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h index a86102f..bc6c657 100644 --- a/libcxx/include/__algorithm/includes.h +++ b/libcxx/include/__algorithm/includes.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__config> #include <__functional/identity.h> -#include <__iterator/iterator_traits.h> #include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/move.h> diff --git a/libcxx/include/__algorithm/inplace_merge.h b/libcxx/include/__algorithm/inplace_merge.h index e0ccdab..69213cc 100644 --- a/libcxx/include/__algorithm/inplace_merge.h +++ b/libcxx/include/__algorithm/inplace_merge.h @@ -20,8 +20,6 @@ #include <__config> #include <__cstddef/ptrdiff_t.h> #include <__functional/identity.h> -#include <__iterator/advance.h> -#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/reverse_iterator.h> #include <__memory/destruct_n.h> diff --git a/libcxx/include/__algorithm/is_heap.h b/libcxx/include/__algorithm/is_heap.h index fa668c1..dfe0620 100644 --- a/libcxx/include/__algorithm/is_heap.h +++ b/libcxx/include/__algorithm/is_heap.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/is_heap_until.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/is_permutation.h b/libcxx/include/__algorithm/is_permutation.h index 8753e9f..1afb115 100644 --- a/libcxx/include/__algorithm/is_permutation.h +++ b/libcxx/include/__algorithm/is_permutation.h @@ -17,7 +17,6 @@ #include <__iterator/concepts.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> diff --git a/libcxx/include/__algorithm/is_sorted.h b/libcxx/include/__algorithm/is_sorted.h index ff61a73..196ae0b 100644 --- a/libcxx/include/__algorithm/is_sorted.h +++ b/libcxx/include/__algorithm/is_sorted.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/is_sorted_until.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/is_sorted_until.h b/libcxx/include/__algorithm/is_sorted_until.h index b64fb65..6066419 100644 --- a/libcxx/include/__algorithm/is_sorted_until.h +++ b/libcxx/include/__algorithm/is_sorted_until.h @@ -12,7 +12,6 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h index 2144ba9..4fba674 100644 --- a/libcxx/include/__algorithm/lower_bound.h +++ b/libcxx/include/__algorithm/lower_bound.h @@ -19,7 +19,6 @@ #include <__iterator/iterator_traits.h> #include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> -#include <__type_traits/remove_reference.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/make_projected.h b/libcxx/include/__algorithm/make_projected.h index 22cceb4c..4b54c50 100644 --- a/libcxx/include/__algorithm/make_projected.h +++ b/libcxx/include/__algorithm/make_projected.h @@ -9,16 +9,13 @@ #ifndef _LIBCPP___ALGORITHM_MAKE_PROJECTED_H #define _LIBCPP___ALGORITHM_MAKE_PROJECTED_H -#include <__concepts/same_as.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> #include <__type_traits/invoke.h> #include <__type_traits/is_member_pointer.h> -#include <__type_traits/is_same.h> #include <__utility/declval.h> #include <__utility/forward.h> diff --git a/libcxx/include/__algorithm/merge.h b/libcxx/include/__algorithm/merge.h index bad663c..ae859b7 100644 --- a/libcxx/include/__algorithm/merge.h +++ b/libcxx/include/__algorithm/merge.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/copy.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h index 556bd42..a683679 100644 --- a/libcxx/include/__algorithm/mismatch.h +++ b/libcxx/include/__algorithm/mismatch.h @@ -27,7 +27,6 @@ #include <__type_traits/is_integral.h> #include <__utility/move.h> #include <__utility/pair.h> -#include <__utility/unreachable.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference index 22637d4..9fa24c98 100644 --- a/libcxx/include/__bit_reference +++ b/libcxx/include/__bit_reference @@ -11,20 +11,18 @@ #define _LIBCPP___BIT_REFERENCE #include <__algorithm/copy_n.h> -#include <__algorithm/fill_n.h> #include <__algorithm/min.h> #include <__bit/countr.h> -#include <__bit/invert_if.h> -#include <__bit/popcount.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> #include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/is_constant_evaluated.h> #include <__utility/swap.h> -#include <cstring> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table index fe7465a..9c821ea 100644 --- a/libcxx/include/__hash_table +++ b/libcxx/include/__hash_table @@ -16,6 +16,7 @@ #include <__bit/countl.h> #include <__config> #include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__functional/hash.h> #include <__iterator/iterator_traits.h> #include <__math/rounding_functions.h> @@ -28,14 +29,12 @@ #include <__memory/unique_ptr.h> #include <__new/launder.h> #include <__type_traits/can_extract_key.h> -#include <__type_traits/conditional.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_const.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_pointer.h> #include <__type_traits/is_reference.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> @@ -45,7 +44,6 @@ #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/swap.h> -#include <cstring> #include <limits> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer index 5ee70220..9d7fd3e 100644 --- a/libcxx/include/__split_buffer +++ b/libcxx/include/__split_buffer @@ -14,7 +14,6 @@ #include <__algorithm/move.h> #include <__algorithm/move_backward.h> #include <__config> -#include <__cstddef/size_t.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/move_iterator.h> @@ -24,7 +23,6 @@ #include <__memory/compressed_pair.h> #include <__memory/pointer_traits.h> #include <__memory/swap_allocator.h> -#include <__type_traits/add_lvalue_reference.h> #include <__type_traits/conditional.h> #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> diff --git a/libcxx/include/__tree b/libcxx/include/__tree index f6ef21c..dfb205c 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -23,14 +23,12 @@ #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> #include <__type_traits/can_extract_key.h> -#include <__type_traits/conditional.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_const.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_pointer.h> #include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/remove_const_ref.h> diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ef96b74..883e24d 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -323,21 +323,6 @@ struct Frame { << " Column: " << Column << "\n" << " Inline: " << IsInlineFrame << "\n"; } - - // Return a hash value based on the contents of the frame. Here we use a - // cryptographic hash function to minimize the chance of hash collisions. We - // do persist FrameIds as part of memprof formats up to Version 2, inclusive. - // However, the deserializer never calls this function; it uses FrameIds - // merely as keys to look up Frames proper. - inline FrameId hash() const { - llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> - HashBuilder; - HashBuilder.add(Function, LineOffset, Column, IsInlineFrame); - llvm::BLAKE3Result<8> Hash = HashBuilder.final(); - FrameId Id; - std::memcpy(&Id, Hash.data(), sizeof(Hash)); - return Id; - } }; // A type representing the index into the table of call stacks. @@ -775,9 +760,6 @@ public: } }; -// Compute a CallStackId for a given call stack. -CallStackId hashCallStack(ArrayRef<FrameId> CS); - namespace detail { // "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have // to do so in one of two different ways depending on the type of the hash @@ -1011,7 +993,7 @@ struct IndexedMemProfData { llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks; FrameId addFrame(const Frame &F) { - const FrameId Id = F.hash(); + const FrameId Id = hashFrame(F); Frames.try_emplace(Id, F); return Id; } @@ -1027,6 +1009,25 @@ struct IndexedMemProfData { CallStacks.try_emplace(CSId, std::move(CS)); return CSId; } + +private: + // Return a hash value based on the contents of the frame. Here we use a + // cryptographic hash function to minimize the chance of hash collisions. We + // do persist FrameIds as part of memprof formats up to Version 2, inclusive. + // However, the deserializer never calls this function; it uses FrameIds + // merely as keys to look up Frames proper. + FrameId hashFrame(const Frame &F) const { + llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> + HashBuilder; + HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame); + llvm::BLAKE3Result<8> Hash = HashBuilder.final(); + FrameId Id; + std::memcpy(&Id, Hash.data(), sizeof(Hash)); + return Id; + } + + // Compute a CallStackId for a given call stack. + CallStackId hashCallStack(ArrayRef<FrameId> CS) const; }; struct FrameStat { diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 119e091..d514485 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -1412,7 +1412,7 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const { } // Capture branch regions specific to the function (excluding expansions). for (const auto &CR : Function.CountedBranchRegions) - if (FileIDs.test(CR.FileID) && (CR.FileID == CR.ExpandedFileID)) + if (FileIDs.test(CR.FileID)) FileCoverage.BranchRegions.push_back(CR); // Capture MCDC records specific to the function. for (const auto &MR : Function.MCDCRecords) diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 1c240c3..15ab449 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -290,7 +290,7 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) { return Result; } -CallStackId hashCallStack(ArrayRef<FrameId> CS) { +CallStackId IndexedMemProfData::hashCallStack(ArrayRef<FrameId> CS) const { llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little> HashBuilder; for (FrameId F : CS) diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc index c7b4c7a..aedcd9a 100644 --- a/llvm/lib/Support/Unix/Threading.inc +++ b/llvm/lib/Support/Unix/Threading.inc @@ -141,7 +141,7 @@ uint64_t llvm::get_threadid() { #elif defined(__ANDROID__) return uint64_t(gettid()); #elif defined(__linux__) - return uint64_t(syscall(SYS_gettid)); + return uint64_t(syscall(__NR_gettid)); #else return uint64_t(pthread_self()); #endif diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index df0320f..f97ea40 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9292,6 +9292,7 @@ static bool isRenamedInGFX9(int Opcode) { GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32) // case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64: + case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: case AMDGPU::V_INTERP_P2_F16: case AMDGPU::V_MAD_F16_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index b1f93a4..532df39 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3708,12 +3708,15 @@ def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>; def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>; def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; -def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; -def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>; def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>; -def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; -def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; +} + +let True16Predicate = UseFakeTrue16Insts in { +def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>; +def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>; } let OtherPredicates = [isGFX9Plus] in { diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 34f90b3..df50113 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -339,8 +339,7 @@ let FPDPRounding = 1 in { } // End Predicates = [Has16BitInsts, isGFX8Only] let SubtargetPredicate = isGFX9Plus in { - defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9", - VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>; + defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>; defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>; } // End SubtargetPredicate = isGFX9Plus } // End FPDPRounding = 1 @@ -643,8 +642,8 @@ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>; defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; -defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; -defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>; +defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; +defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>; @@ -1375,8 +1374,8 @@ class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR let SubtargetPredicate = isGFX11Plus in { defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>; - defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; - defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>; + defm V_MAXMIN_F16 : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>; + defm V_MINMAX_F16 : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>; defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>; defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>; @@ -1720,7 +1719,7 @@ defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_ defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">; defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">; defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">; -defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">; +defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">; defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>; defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>; defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>; @@ -1731,8 +1730,8 @@ defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>; defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>; defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>; defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>; -defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>; -defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>; +defm V_MAXMIN_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">; +defm V_MINMAX_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">; defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>; defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>; defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>; @@ -1755,8 +1754,8 @@ defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30 defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">; defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">; defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">; -defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >; -defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >; +defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">; +defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">; defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">; defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">; defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 689f3b3..1afd687 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -405,16 +405,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", let OtherPredicates = [HasDot7Insts] in { defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>; defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>; } // End OtherPredicates = [HasDot7Insts] let OtherPredicates = [HasDot1Insts] in { defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>; defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", - VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>; + VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>; } // End OtherPredicates = [HasDot1Insts] def DOT2_BF16_Profile @@ -433,7 +433,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile, multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> { let IsDOT = 1 in - defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, + defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, null_frag, 1>; // Dot-iu instructions consider input as signed if imod neg bits are set. Thus // Dot-iu Intrinsics have extra operands and require separate codegen pattern. diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 0f568ba..930ed9a 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1454,12 +1454,13 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> { bit IsMAI = MAI; } -def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; -def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; -def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; -def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; -def VOP3_MAI : VOP3Features<0, 0, 0, 1>; -def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>; +def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>; +def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>; +def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>; +def VOP3_PACKED : VOP3Features<1, 1, 1, 0>; +def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>; +def VOP3_MAI : VOP3Features<0, 0, 0, 1>; +def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>; // Packed is misleading, but it enables the appropriate op_sel // modifiers. @@ -1908,8 +1909,8 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME, string pseudo_mnemonic = "", bit isSingle = 0> { - defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>; - defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>; + defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>; + defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>; } multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName, diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp index 1783e4a..2ab2daa 100644 --- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp +++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp @@ -40,7 +40,7 @@ static bool findAndReplaceVectors(Module &M); class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> { public: DataScalarizerVisitor() : GlobalMap() {} - bool visit(Function &F); + bool visit(Instruction &I); // InstVisitor methods. They return true if the instruction was scalarized, // false if nothing changed. bool visitInstruction(Instruction &I) { return false; } @@ -65,28 +65,11 @@ public: private: GlobalVariable *lookupReplacementGlobal(Value *CurrOperand); DenseMap<GlobalVariable *, GlobalVariable *> GlobalMap; - SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs; - bool finish(); }; -bool DataScalarizerVisitor::visit(Function &F) { +bool DataScalarizerVisitor::visit(Instruction &I) { assert(!GlobalMap.empty()); - ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock()); - for (BasicBlock *BB : RPOT) { - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { - Instruction *I = &*II; - bool Done = InstVisitor::visit(I); - ++II; - if (Done && I->getType()->isVoidTy()) - I->eraseFromParent(); - } - } - return finish(); -} - -bool DataScalarizerVisitor::finish() { - RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs); - return true; + return InstVisitor::visit(I); } GlobalVariable * @@ -104,6 +87,20 @@ bool DataScalarizerVisitor::visitLoadInst(LoadInst &LI) { unsigned NumOperands = LI.getNumOperands(); for (unsigned I = 0; I < NumOperands; ++I) { Value *CurrOpperand = LI.getOperand(I); + ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand); + if (CE && CE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEP = + cast<GetElementPtrInst>(CE->getAsInstruction()); + OldGEP->insertBefore(&LI); + IRBuilder<> Builder(&LI); + LoadInst *NewLoad = + Builder.CreateLoad(LI.getType(), OldGEP, LI.getName()); + NewLoad->setAlignment(LI.getAlign()); + LI.replaceAllUsesWith(NewLoad); + LI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; + } if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) LI.setOperand(I, NewGlobal); } @@ -114,32 +111,48 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) { unsigned NumOperands = SI.getNumOperands(); for (unsigned I = 0; I < NumOperands; ++I) { Value *CurrOpperand = SI.getOperand(I); - if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) { - SI.setOperand(I, NewGlobal); + ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand); + if (CE && CE->getOpcode() == Instruction::GetElementPtr) { + GetElementPtrInst *OldGEP = + cast<GetElementPtrInst>(CE->getAsInstruction()); + OldGEP->insertBefore(&SI); + IRBuilder<> Builder(&SI); + StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP); + NewStore->setAlignment(SI.getAlign()); + SI.replaceAllUsesWith(NewStore); + SI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; } + if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) + SI.setOperand(I, NewGlobal); } return false; } bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { + unsigned NumOperands = GEPI.getNumOperands(); + GlobalVariable *NewGlobal = nullptr; for (unsigned I = 0; I < NumOperands; ++I) { Value *CurrOpperand = GEPI.getOperand(I); - GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand); - if (!NewGlobal) - continue; - IRBuilder<> Builder(&GEPI); - - SmallVector<Value *, MaxVecSize> Indices; - for (auto &Index : GEPI.indices()) - Indices.push_back(Index); - - Value *NewGEP = - Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices); - - GEPI.replaceAllUsesWith(NewGEP); - PotentiallyDeadInstrs.emplace_back(&GEPI); + NewGlobal = lookupReplacementGlobal(CurrOpperand); + if (NewGlobal) + break; } + if (!NewGlobal) + return false; + + IRBuilder<> Builder(&GEPI); + SmallVector<Value *, MaxVecSize> Indices; + for (auto &Index : GEPI.indices()) + Indices.push_back(Index); + + Value *NewGEP = + Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices, + GEPI.getName(), GEPI.getNoWrapFlags()); + GEPI.replaceAllUsesWith(NewGEP); + GEPI.eraseFromParent(); return true; } @@ -245,17 +258,13 @@ static bool findAndReplaceVectors(Module &M) { for (User *U : make_early_inc_range(G.users())) { if (isa<ConstantExpr>(U) && isa<Operator>(U)) { ConstantExpr *CE = cast<ConstantExpr>(U); - convertUsersOfConstantsToInstructions(CE, - /*RestrictToFunc=*/nullptr, - /*RemoveDeadConstants=*/false, - /*IncludeSelf=*/true); - } - if (isa<Instruction>(U)) { - Instruction *Inst = cast<Instruction>(U); - Function *F = Inst->getFunction(); - if (F) - Impl.visit(*F); + for (User *UCE : make_early_inc_range(CE->users())) { + if (Instruction *Inst = dyn_cast<Instruction>(UCE)) + Impl.visit(*Inst); + } } + if (Instruction *Inst = dyn_cast<Instruction>(U)) + Impl.visit(*Inst); } } } diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp index 6077af9..53fc1c7 100644 --- a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp +++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp @@ -162,11 +162,18 @@ bool DXILFlattenArraysVisitor::visitLoadInst(LoadInst &LI) { Value *CurrOpperand = LI.getOperand(I); ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand); if (CE && CE->getOpcode() == Instruction::GetElementPtr) { - convertUsersOfConstantsToInstructions(CE, - /*RestrictToFunc=*/nullptr, - /*RemoveDeadConstants=*/false, - /*IncludeSelf=*/true); - return false; + GetElementPtrInst *OldGEP = + cast<GetElementPtrInst>(CE->getAsInstruction()); + OldGEP->insertBefore(&LI); + + IRBuilder<> Builder(&LI); + LoadInst *NewLoad = + Builder.CreateLoad(LI.getType(), OldGEP, LI.getName()); + NewLoad->setAlignment(LI.getAlign()); + LI.replaceAllUsesWith(NewLoad); + LI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; } } return false; @@ -178,11 +185,17 @@ bool DXILFlattenArraysVisitor::visitStoreInst(StoreInst &SI) { Value *CurrOpperand = SI.getOperand(I); ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand); if (CE && CE->getOpcode() == Instruction::GetElementPtr) { - convertUsersOfConstantsToInstructions(CE, - /*RestrictToFunc=*/nullptr, - /*RemoveDeadConstants=*/false, - /*IncludeSelf=*/true); - return false; + GetElementPtrInst *OldGEP = + cast<GetElementPtrInst>(CE->getAsInstruction()); + OldGEP->insertBefore(&SI); + + IRBuilder<> Builder(&SI); + StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP); + NewStore->setAlignment(SI.getAlign()); + SI.replaceAllUsesWith(NewStore); + SI.eraseFromParent(); + visitGetElementPtrInst(*OldGEP); + return true; } } return false; @@ -315,10 +328,17 @@ bool DXILFlattenArraysVisitor::visit(Function &F) { static void collectElements(Constant *Init, SmallVectorImpl<Constant *> &Elements) { // Base case: If Init is not an array, add it directly to the vector. - if (!isa<ArrayType>(Init->getType())) { + auto *ArrayTy = dyn_cast<ArrayType>(Init->getType()); + if (!ArrayTy) { Elements.push_back(Init); return; } + unsigned ArrSize = ArrayTy->getNumElements(); + if (isa<ConstantAggregateZero>(Init)) { + for (unsigned I = 0; I < ArrSize; ++I) + Elements.push_back(Constant::getNullValue(ArrayTy->getElementType())); + return; + } // Recursive case: Process each element in the array. if (auto *ArrayConstant = dyn_cast<ConstantArray>(Init)) { diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index a898b6a..c283b90 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -10,8 +10,11 @@ #include "DXILConstants.h" #include "DXILIntrinsicExpansion.h" #include "DXILOpBuilder.h" +#include "DXILResourceAnalysis.h" +#include "DXILShaderFlags.h" #include "DirectX.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/DXILMetadataAnalysis.h" #include "llvm/Analysis/DXILResource.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -763,6 +766,8 @@ PreservedAnalyses DXILOpLowering::run(Module &M, ModuleAnalysisManager &MAM) { return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve<DXILResourceBindingAnalysis>(); + PA.preserve<DXILMetadataAnalysis>(); + PA.preserve<ShaderFlagsAnalysis>(); return PA; } @@ -785,6 +790,9 @@ public: AU.addRequired<DXILResourceTypeWrapperPass>(); AU.addRequired<DXILResourceBindingWrapperPass>(); AU.addPreserved<DXILResourceBindingWrapperPass>(); + AU.addPreserved<DXILResourceMDWrapper>(); + AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); + AU.addPreserved<ShaderFlagsAnalysisWrapper>(); } }; char DXILOpLoweringLegacy::ID = 0; diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp index d6917dc..1e88963 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp @@ -14,16 +14,21 @@ #include "DXILShaderFlags.h" #include "DirectX.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/DXILResource.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace llvm::dxil; -static void updateFunctionFlags(ComputedShaderFlags &CSF, - const Instruction &I) { +static void updateFunctionFlags(ComputedShaderFlags &CSF, const Instruction &I, + DXILResourceTypeMap &DRTM) { if (!CSF.Doubles) CSF.Doubles = I.getType()->isDoubleTy(); @@ -44,17 +49,34 @@ static void updateFunctionFlags(ComputedShaderFlags &CSF, break; } } + + if (auto *II = dyn_cast<IntrinsicInst>(&I)) { + switch (II->getIntrinsicID()) { + default: + break; + case Intrinsic::dx_typedBufferLoad: { + dxil::ResourceTypeInfo &RTI = + DRTM[cast<TargetExtType>(II->getArgOperand(0)->getType())]; + if (RTI.isTyped()) + CSF.TypedUAVLoadAdditionalFormats |= RTI.getTyped().ElementCount > 1; + } + } + } } -void ModuleShaderFlags::initialize(const Module &M) { +void ModuleShaderFlags::initialize(const Module &M, DXILResourceTypeMap &DRTM) { + // Collect shader flags for each of the functions for (const auto &F : M.getFunctionList()) { - if (F.isDeclaration()) + if (F.isDeclaration()) { + assert(!F.getName().starts_with("dx.op.") && + "DXIL Shader Flag analysis should not be run post-lowering."); continue; + } ComputedShaderFlags CSF; for (const auto &BB : F) for (const auto &I : BB) - updateFunctionFlags(CSF, I); + updateFunctionFlags(CSF, I, DRTM); // Insert shader flag mask for function F FunctionFlags.push_back({&F, CSF}); // Update combined shader flags mask @@ -101,8 +123,11 @@ AnalysisKey ShaderFlagsAnalysis::Key; ModuleShaderFlags ShaderFlagsAnalysis::run(Module &M, ModuleAnalysisManager &AM) { + DXILResourceTypeMap &DRTM = AM.getResult<DXILResourceTypeAnalysis>(M); + ModuleShaderFlags MSFI; - MSFI.initialize(M); + MSFI.initialize(M, DRTM); + return MSFI; } @@ -129,11 +154,22 @@ PreservedAnalyses ShaderFlagsAnalysisPrinter::run(Module &M, // ShaderFlagsAnalysis and ShaderFlagsAnalysisPrinterPass bool ShaderFlagsAnalysisWrapper::runOnModule(Module &M) { - MSFI.initialize(M); + DXILResourceTypeMap &DRTM = + getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap(); + + MSFI.initialize(M, DRTM); return false; } +void ShaderFlagsAnalysisWrapper::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DXILResourceTypeWrapperPass>(); +} + char ShaderFlagsAnalysisWrapper::ID = 0; -INITIALIZE_PASS(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis", - "DXIL Shader Flag Analysis", true, true) +INITIALIZE_PASS_BEGIN(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis", + "DXIL Shader Flag Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass) +INITIALIZE_PASS_END(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis", + "DXIL Shader Flag Analysis", true, true) diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h index 2d60137..67ddab3 100644 --- a/llvm/lib/Target/DirectX/DXILShaderFlags.h +++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h @@ -26,6 +26,7 @@ namespace llvm { class Module; class GlobalVariable; +class DXILResourceTypeMap; namespace dxil { @@ -84,7 +85,7 @@ struct ComputedShaderFlags { }; struct ModuleShaderFlags { - void initialize(const Module &); + void initialize(const Module &, DXILResourceTypeMap &DRTM); const ComputedShaderFlags &getFunctionFlags(const Function *) const; const ComputedShaderFlags &getCombinedFlags() const { return CombinedSFMask; } @@ -135,9 +136,7 @@ public: bool runOnModule(Module &M) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesAll(); - } + void getAnalysisUsage(AnalysisUsage &AU) const override; }; } // namespace dxil diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp index f175f16..5afe6b2 100644 --- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp +++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp @@ -393,6 +393,7 @@ public: AU.addPreserved<DXILResourceBindingWrapperPass>(); AU.addPreserved<DXILResourceMDWrapper>(); AU.addPreserved<DXILMetadataAnalysisWrapperPass>(); + AU.addPreserved<ShaderFlagsAnalysisWrapper>(); } bool runOnModule(Module &M) override { diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index d4e35fb..ecb1bf7 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -99,8 +99,8 @@ public: ScalarizerPassOptions DxilScalarOptions; DxilScalarOptions.ScalarizeLoadStore = true; addPass(createScalarizerPass(DxilScalarOptions)); - addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILTranslateMetadataLegacyPass()); + addPass(createDXILOpLoweringLegacyPass()); addPass(createDXILPrepareModulePass()); } }; diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index a6be246..cd4b9f5 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -3369,6 +3369,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones( if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1) break; + // If the caller was not successfully matched to a call in the IR/summary, + // there is no point in trying to clone for it as we can't update that call. + if (!CallerEdge->Caller->hasCall()) { + ++EI; + continue; + } + // Only need to process the ids along this edge pertaining to the given // allocation. auto CallerEdgeContextsForAlloc = diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp index 497fe4b..db89999 100644 --- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp @@ -171,6 +171,11 @@ static cl::opt<std::string> cl::desc("The default memprof options"), cl::Hidden, cl::init("")); +static cl::opt<bool> + SalvageStaleProfile("memprof-salvage-stale-profile", + cl::desc("Salvage stale MemProf profile"), + cl::init(false), cl::Hidden); + extern cl::opt<bool> MemProfReportHintedSizes; static cl::opt<unsigned> MinMatchedColdBytePercent( @@ -911,10 +916,38 @@ memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader, return UndriftMaps; } +// Given a MemProfRecord, undrift all the source locations present in the +// record in place. +static void +undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps, + memprof::MemProfRecord &MemProfRec) { + // Undrift a call stack in place. + auto UndriftCallStack = [&](std::vector<Frame> &CallStack) { + for (auto &F : CallStack) { + auto I = UndriftMaps.find(F.Function); + if (I == UndriftMaps.end()) + continue; + auto J = I->second.find(LineLocation(F.LineOffset, F.Column)); + if (J == I->second.end()) + continue; + auto &NewLoc = J->second; + F.LineOffset = NewLoc.LineOffset; + F.Column = NewLoc.Column; + } + }; + + for (auto &AS : MemProfRec.AllocSites) + UndriftCallStack(AS.CallStack); + + for (auto &CS : MemProfRec.CallSites) + UndriftCallStack(CS); +} + static void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, const TargetLibraryInfo &TLI, - std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) { + std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo, + DenseMap<uint64_t, LocToLocMap> &UndriftMaps) { auto &Ctx = M.getContext(); // Previously we used getIRPGOFuncName() here. If F is local linkage, // getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But @@ -962,6 +995,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader, NumOfMemProfFunc++; + // If requested, undrfit MemProfRecord so that the source locations in it + // match those in the IR. + if (SalvageStaleProfile) + undriftMemProfRecord(UndriftMaps, *MemProfRec); + // Detect if there are non-zero column numbers in the profile. If not, // treat all column numbers as 0 when matching (i.e. ignore any non-zero // columns in the IR). The profiled binary might have been built with @@ -1195,6 +1233,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); + TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin()); + DenseMap<uint64_t, LocToLocMap> UndriftMaps; + if (SalvageStaleProfile) + UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI); + // Map from the stack has of each allocation context in the function profiles // to the total profiled size (bytes), allocation type, and whether we matched // it to an allocation in the IR. @@ -1205,7 +1248,8 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) { continue; const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F); - readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo); + readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo, + UndriftMaps); } if (ClPrintMemProfMatchInfo) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d967813..8804989 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -21148,8 +21148,11 @@ bool SLPVectorizerPass::vectorizeCmpInsts(iterator_range<ItT> CmpInsts, if (R.isDeleted(I)) continue; for (Value *Op : I->operands()) - if (auto *RootOp = dyn_cast<Instruction>(Op)) + if (auto *RootOp = dyn_cast<Instruction>(Op)) { Changed |= vectorizeRootInstruction(nullptr, RootOp, BB, R); + if (R.isDeleted(I)) + break; + } } // Try to vectorize operands as vector bundles. for (CmpInst *I : CmpInsts) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ca8614b..71f43ab 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -821,6 +821,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, } #endif +VPlan::VPlan(Loop *L) { + setEntry(VPIRBasicBlock::fromBasicBlock(L->getLoopPreheader())); + ScalarHeader = VPIRBasicBlock::fromBasicBlock(L->getHeader()); +} + VPlan::~VPlan() { if (Entry) { VPValue DummyValue; @@ -847,19 +852,17 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop) { - VPIRBasicBlock *Entry = - VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader()); - VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); + auto Plan = std::make_unique<VPlan>(TheLoop); + VPBlockBase *ScalarHeader = Plan->getScalarHeader(); + // Connect entry only to vector preheader initially. Entry will also be // connected to the scalar preheader later, during skeleton creation when // runtime guards are added as needed. Note that when executing the VPlan for // an epilogue vector loop, the original entry block here will be replaced by // a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after // generating code for the main vector loop. - VPBlockUtils::connectBlocks(Entry, VecPreheader); - VPIRBasicBlock *ScalarHeader = - VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader()); - auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader); + VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); + VPBlockUtils::connectBlocks(Plan->getEntry(), VecPreheader); // Create SCEV and VPValue for the trip count. // We use the symbolic max backedge-taken-count, which works also when diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index d2699588..8dd94a2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3813,13 +3813,6 @@ class VPlan { DenseMap<const SCEV *, VPValue *> SCEVToExpansion; public: - /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and - /// with \p ScalarHeader wrapping the original header of the scalar loop. - VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader) - : VPlan(Entry, ScalarHeader) { - TripCount = TC; - } - /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader /// wrapping the original header of the scalar loop. VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader) @@ -3829,6 +3822,18 @@ public: "scalar header must be a leaf node"); } + /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and + /// with \p ScalarHeader wrapping the original header of the scalar loop. + VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader) + : VPlan(Entry, ScalarHeader) { + TripCount = TC; + } + + /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the + /// original preheader and scalar header of \p L, to be used as entry and + /// scalar header blocks of the new VPlan. + VPlan(Loop *L); + ~VPlan(); void setEntry(VPBasicBlock *VPBB) { diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir index 8be7308..3feccff 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir @@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write body: | bb.0: $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec - $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write # GCN: V_MFMA @@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc body: | bb.0: $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec - $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: name: dot_write_vgpr_different_dot_write # GCN: V_DOT @@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write body: | bb.0: $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec - $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: name: dot_write_vgpr_different_valu_read # GCN: V_DOT diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir index d59bcfb..5289198 100644 --- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir +++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir @@ -1071,7 +1071,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write body: | bb.0: $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec - $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write # GCN: V_MFMA @@ -1265,7 +1265,7 @@ name: dot_write_vgpr_different_dot_read_srcc body: | bb.0: $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec - $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: name: dot_write_vgpr_different_dot_write # GCN: V_DOT @@ -1275,7 +1275,7 @@ name: dot_write_vgpr_different_dot_write body: | bb.0: $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec - $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec ... # GCN-LABEL: name: dot_write_vgpr_different_valu_read # GCN: V_DOT diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll index 234d4e0..c9969c9 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandle.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s +; RUN: opt -S -passes=dxil-translate-metadata,dxil-op-lower %s | FileCheck %s ; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s ; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll index aa143df..425084e 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s +; RUN: opt -S -passes=dxil-translate-metadata,dxil-op-lower %s | FileCheck %s ; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s ; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll new file mode 100644 index 0000000..b694739 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll @@ -0,0 +1,44 @@ +; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s +; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=CHECK-OBJ + +target triple = "dxil-pc-shadermodel6.7-library" + +; CHECK-OBJ: - Name: SFI0 +; CHECK-OBJ: Flags: +; CHECK-OBJ: TypedUAVLoadAdditionalFormats: true + +; CHECK: Combined Shader Flags for Module +; CHECK-NEXT: Shader Flags Value: 0x00002000 + +; CHECK: Note: shader requires additional functionality: +; CHECK: Typed UAV Load Additional Formats + +; CHECK: Function multicomponent : 0x00002000 +define <4 x float> @multicomponent() #0 { + %res = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding(i32 0, i32 0, i32 1, i32 0, i1 false) + %val = call <4 x float> @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %res, i32 0) + ret <4 x float> %val +} + +; CHECK: Function onecomponent : 0x00000000 +define float @onecomponent() #0 { + %res = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.handle.fromBinding(i32 0, i32 0, i32 1, i32 0, i1 false) + %val = call float @llvm.dx.typedBufferLoad( + target("dx.TypedBuffer", float, 1, 0, 0) %res, i32 0) + ret float %val +} + +; CHECK: Function noload : 0x00000000 +define void @noload(<4 x float> %val) #0 { + %res = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.handle.fromBinding(i32 0, i32 0, i32 1, i32 0, i1 false) + call void @llvm.dx.typedBufferStore( + target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %res, i32 0, + <4 x float> %val) + ret void +} + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll new file mode 100644 index 0000000..3ae5832 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-flatten-arrays,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + + +@ZerroInitArr = internal constant [2 x [3 x float]] [[3 x float] zeroinitializer, [3 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00]], align 16 + + +define internal void @main() { +; CHECK-LABEL: define internal void @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 1 +; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 2 +; CHECK-NEXT: [[DOTI03:%.*]] = load float, ptr [[TMP1]], align 16 +; CHECK-NEXT: ret void +; +entry: + %0 = getelementptr [8 x [3 x float]], ptr @ZerroInitArr, i32 0, i32 1 + %.i0 = load float, ptr %0, align 16 + %1 = getelementptr [8 x [3 x float]], ptr @ZerroInitArr, i32 0, i32 2 + %.i03 = load float, ptr %1, align 16 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll index b3f1609..b071557 100644 --- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll +++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \ -; RUN: grep -v "Verify generated machine code" | FileCheck %s +; RUN: llc -filetype=asm -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s --check-prefixes=CHECK,CHECK-ASM +; RUN: llc -filetype=obj -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s --check-prefixes=CHECK,CHECK-OBJ ; REQUIRES: asserts @@ -7,6 +7,11 @@ ; CHECK-NEXT: Target Library Information ; CHECK-NEXT: DXIL Resource Type Analysis ; CHECK-NEXT: Target Transform Information + +; CHECK-OBJ-NEXT: Machine Module Information +; CHECK-OBJ-NEXT: Machine Branch Probability Analysis +; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata + ; CHECK-NEXT: ModulePass Manager ; CHECK-NEXT: DXIL Finalize Linkage ; CHECK-NEXT: DXIL Intrinsic Expansion @@ -17,11 +22,19 @@ ; CHECK-NEXT: Dominator Tree Construction ; CHECK-NEXT: Scalarize vector operations ; CHECK-NEXT: DXIL Resource Binding Analysis -; CHECK-NEXT: DXIL Op Lowering ; CHECK-NEXT: DXIL resource Information ; CHECK-NEXT: DXIL Shader Flag Analysis ; CHECK-NEXT: DXIL Module Metadata analysis ; CHECK-NEXT: DXIL Translate Metadata +; CHECK-NEXT: DXIL Op Lowering ; CHECK-NEXT: DXIL Prepare Module -; CHECK-NEXT: DXIL Metadata Pretty Printer -; CHECK-NEXT: Print Module IR + +; CHECK-ASM-NEXT: DXIL Metadata Pretty Printer +; CHECK-ASM-NEXT: Print Module IR + +; CHECK-OBJ-NEXT: DXIL Embedder +; CHECK-OBJ-NEXT: DXContainer Global Emitter +; CHECK-OBJ-NEXT: FunctionPass Manager +; CHECK-OBJ-NEXT: Lazy Machine Block Frequency Analysis +; CHECK-OBJ-NEXT: Machine Optimization Remark Emitter +; CHECK-OBJ-NEXT: DXIL Assembly Printer diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll index 5972520..4e522c6 100644 --- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll +++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll @@ -21,7 +21,6 @@ ; CHECK-NOT: @groushared2dArrayofVectors ; CHECK-NOT: @groushared2dArrayofVectors.scalarized - define <4 x i32> @load_array_vec_test() #0 { ; CHECK-LABEL: define <4 x i32> @load_array_vec_test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -33,18 +32,13 @@ define <4 x i32> @load_array_vec_test() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [2 x [3 x float]], ptr addrspace(3) [[TMP9]], i32 0, i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1 +; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2 +; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3 +; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4 ; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]] ; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]] @@ -87,7 +81,7 @@ define <4 x i32> @load_vec_test() #0 { define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { ; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test( ; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]] +; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr @@ -121,18 +115,13 @@ define <4 x i32> @multid_load_test() #0 { ; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4 -; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3) -; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x [3 x [4 x i32]]], ptr addrspace(3) [[TMP9]], i32 0, i32 1, i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1 +; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2 +; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3) -; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3 +; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3) ; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4 ; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]] ; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]] diff --git a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll new file mode 100644 index 0000000..25dc2c3 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll @@ -0,0 +1,25 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s + + +@StaticArr = internal constant [8 x <3 x float>] [<3 x float> zeroinitializer, <3 x float> splat (float 5.000000e-01), <3 x float> <float 1.000000e+00, float 5.000000e-01, float 5.000000e-01>, <3 x float> <float 5.000000e-01, float 1.000000e+00, float 5.000000e-01>, <3 x float> <float 5.000000e-01, float 5.000000e-01, float 1.000000e+00>, <3 x float> <float 5.000000e-01, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 5.000000e-01, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 5.000000e-01>], align 16 + +; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind +define internal void @main() #1 { +; CHECK-LABEL: define internal void @main() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), align 16 +; CHECK-NEXT: [[DOTI1:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), i32 1), align 4 +; CHECK-NEXT: [[DOTI2:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), i32 2), align 8 +; CHECK-NEXT: [[DOTI01:%.*]] = load float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), align 16 +; CHECK-NEXT: [[DOTI12:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), i32 1), align 4 +; CHECK-NEXT: [[DOTI23:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), i32 2), align 8 +; CHECK-NEXT: ret void +; +entry: + %arrayidx = getelementptr inbounds [8 x <3 x float>], ptr @StaticArr, i32 0, i32 1 + %2 = load <3 x float>, ptr %arrayidx, align 16 + %arrayidx2 = getelementptr inbounds [8 x <3 x float>], ptr @StaticArr, i32 0, i32 2 + %3 = load <3 x float>, ptr %arrayidx2, align 16 + ret void +} diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll index a32db8b..ed1e910 100644 --- a/llvm/test/CodeGen/DirectX/scalar-load.ll +++ b/llvm/test/CodeGen/DirectX/scalar-load.ll @@ -2,10 +2,10 @@ ; Make sure we can load groupshared, static vectors and arrays of vectors -@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 -@"vecData" = external addrspace(3) global <4 x i32>, align 4 +@arrayofVecData = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16 +@vecData = external addrspace(3) global <4 x i32>, align 4 @staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4 -@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 +@groushared2dArrayofVectors = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16 ; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16 ; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4 @@ -19,12 +19,12 @@ ; CHECK-LABEL: load_array_vec_test -define <4 x i32> @load_array_vec_test() #0 { - ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 - %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 - %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 - %3 = add <4 x i32> %1, %2 - ret <4 x i32> %3 +define <3 x float> @load_array_vec_test() #0 { + ; CHECK-COUNT-6: load float, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 + %1 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 + %2 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4 + %3 = fadd <3 x float> %1, %2 + ret <3 x float> %3 } ; CHECK-LABEL: load_vec_test @@ -36,8 +36,14 @@ define <4 x i32> @load_vec_test() #0 { ; CHECK-LABEL: load_static_array_of_vec_test define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { - ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index - ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr inbounds [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index + ; CHECK: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr i32, ptr {{.*}}, i32 1 + ; CHECK: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr i32, ptr {{.*}}, i32 2 + ; CHECK: load i32, ptr {{.*}}, align 4 + ; CHECK: getelementptr i32, ptr {{.*}}, i32 3 + ; CHECK: load i32, ptr {{.*}}, align 4 %3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index %4 = load <4 x i32>, <4 x i32>* %3, align 4 ret <4 x i32> %4 diff --git a/llvm/test/MC/AMDGPU/dl-insts.s b/llvm/test/MC/AMDGPU/dl-insts.s index 00e9bec..599734a 100644 --- a/llvm/test/MC/AMDGPU/dl-insts.s +++ b/llvm/test/MC/AMDGPU/dl-insts.s @@ -536,198 +536,6 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] // CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c] v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] -// CHECK: encoding: [0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] -// CHECK: encoding: [0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] -// CHECK: encoding: [0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] -// CHECK: encoding: [0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] -// CHECK: encoding: [0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] -// CHECK: encoding: [0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] -// CHECK: encoding: [0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] -// CHECK: encoding: [0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] -// CHECK: encoding: [0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] -// CHECK: encoding: [0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] -// CHECK: encoding: [0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] -// CHECK: encoding: [0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] -// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] -// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] -// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] -// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c] -v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] // // Test clamp. diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s index 51498d3..87a0987 100644 --- a/llvm/test/MC/AMDGPU/gfx1030_err.s +++ b/llvm/test/MC/AMDGPU/gfx1030_err.s @@ -211,3 +211,365 @@ image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12 // missing dim image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf glc // GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: missing dim operand + +// op_sel not allowed in dot opcodes with 4- or 8-bit packed data + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index d46f010..5aa19e5 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -1268,11 +1268,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi // GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, v1, v2 -// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_i16_f16 v5, v255, v255 -// GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_i16_f16 v5, s1, s2 // GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1304,7 +1304,7 @@ v_cvt_pk_norm_i16_f16 v5, null, exec_lo v_cvt_pk_norm_i16_f16 v5, -1, exec_hi // GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 // GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40] v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] @@ -1313,11 +1313,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, v1, v2 -// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l +// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_u16_f16 v5, v255, v255 -// GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h +// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| +// GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x12,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| +// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_u16_f16 v5, s1, s2 // GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1349,7 +1361,7 @@ v_cvt_pk_norm_u16_f16 v5, null, exec_lo v_cvt_pk_norm_u16_f16 v5, -1, exec_hi // GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0] +v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 // GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40] v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] @@ -1358,6 +1370,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l +// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h +// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| +// GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x13,0xd7,0xfd,0xd4,0x00,0x20] + +v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| +// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] + v_cvt_pk_u16_f32 v5, v1, v2 // GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] @@ -1583,53 +1607,77 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo| v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| // GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_div_fixup_f16 v5, v1, v2, s3 -// GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +v_div_fixup_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f16 v5.l, v255.l, s2, s105 +// GFX11: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f16 v5.l, s105, s105, exec_lo +// GFX11: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] -v_div_fixup_f16 v5, v255, s2, s105 -// GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, s1, v255, exec_hi -// GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] -v_div_fixup_f16 v5, s105, s105, exec_lo -// GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +v_div_fixup_f16 v5.l, m0, 0.5, m0 +// GFX11: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] -v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] -v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX11: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] -v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, m0, 0.5, m0 -// GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] -v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] -v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] -v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX11: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 +// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] -v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +v_div_fixup_f16 v5.l, v255.h, s2, s105 +// GFX11: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] -v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +v_div_fixup_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] -v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 -// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] +v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] + +v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] + +v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 +// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x43] + +v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 +// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x54,0xd6,0xfd,0xd4,0x04,0x23] + +v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 +// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] v_div_fixup_f32 v5, v1, v2, s3 // GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] @@ -3674,50 +3722,62 @@ v_max_u16 v5.l, v255.l, v255.h v_max_u16 v255.h, 0xfe0b, vcc_hi // GFX11: v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_maxmin_f16 v5, v1, v2, s3 -// GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +v_maxmin_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] + +v_maxmin_f16 v5.l, v255.l, s2, s105 +// GFX11: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] + +v_maxmin_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] -v_maxmin_f16 v5, v255, s2, s105 -// GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] +v_maxmin_f16 v5.l, s105, s105, exec_lo +// GFX11: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] -v_maxmin_f16 v5, s1, v255, exec_hi -// GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] +v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] -v_maxmin_f16 v5, s105, s105, exec_lo -// GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] +v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_maxmin_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] +v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] -v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_maxmin_f16 v5.l, m0, 0.5, m0 +// GFX11: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] -v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] +v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] -v_maxmin_f16 v5, m0, 0.5, m0 -// GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] +v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] -v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] +v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] +v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] -v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 +// GFX11: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] -v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] +v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] -v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] +v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] -v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] +v_maxmin_f16 v5.l, v255.h, s2, s105 +// GFX11: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01] -v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 -// GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +v_maxmin_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01] + +v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] v_maxmin_f32 v5, v1, v2, s3 // GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] @@ -4751,50 +4811,62 @@ v_min_u16 v5.l, v255.l, v255.h v_min_u16 v255.h, 0xfe0b, vcc_hi // GFX11: v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] -v_minmax_f16 v5, v1, v2, s3 -// GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +v_minmax_f16 v5.l, v1.l, v2.l, s3 +// GFX11: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] + +v_minmax_f16 v5.l, v255.l, s2, s105 +// GFX11: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] + +v_minmax_f16 v5.l, s1, v255.l, exec_hi +// GFX11: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] + +v_minmax_f16 v5.l, s105, s105, exec_lo +// GFX11: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] + +v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX11: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] -v_minmax_f16 v5, v255, s2, s105 -// GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX11: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_minmax_f16 v5, s1, v255, exec_hi -// GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX11: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] -v_minmax_f16 v5, s105, s105, exec_lo -// GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +v_minmax_f16 v5.l, m0, 0.5, m0 +// GFX11: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] -v_minmax_f16 v5, vcc_lo, ttmp15, v3 -// GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX11: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] -v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| +// GFX11: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] -v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| +// GFX11: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_minmax_f16 v5, m0, 0.5, m0 -// GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| +// GFX11: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] -v_minmax_f16 v5, |exec_lo|, -1, vcc_hi -// GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 +// GFX11: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] -v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| -// GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 +// GFX11: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] -v_minmax_f16 v5, null, exec_lo, -|0xfe0b| -// GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] -v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| -// GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +v_minmax_f16 v5.l, v255.h, s2, s105 +// GFX11: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01] -v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 -// GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +v_minmax_f16 v5.l, s1, v255.h, exec_hi +// GFX11: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01] -v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 -// GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX11: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 -// GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp div:2 +// GFX11: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] v_minmax_f32 v5, v1, v2, s3 // GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index 36d959f..62d6c8e 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -1124,89 +1124,101 @@ v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1418,47 +1430,83 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 +// GFX11: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] + +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2576,47 +2624,92 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_max_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] -v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] -v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] + +v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] + +v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -3620,47 +3713,92 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_min_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] -v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] + +v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] + +v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] + +v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX11: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4757,32 +4895,33 @@ v_xor_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ v_xor_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: [0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 479bd19e..75bbdbc 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -628,29 +628,41 @@ v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -733,41 +745,74 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] +// GFX11: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x54,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x54,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1582,41 +1627,80 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 v_max_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x60,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x60,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] -v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x03,0x60,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x60,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] + +v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2356,41 +2440,80 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 v_min_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x61,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] -v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x61,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] -v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x03,0x61,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x61,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] + +v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX11: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3057,32 +3180,33 @@ v_xor_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1 v_xor_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: [0xff,0x40,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s new file mode 100644 index 0000000..451627e --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s @@ -0,0 +1,219 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s + +// op_sel not allowed in dot opcodes with 4- or 8-bit packed data + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index 62bebd0..0309b2e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -1286,11 +1286,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi // GFX12: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] -v_cvt_pk_norm_i16_f16 v5, v1, v2 -// GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_i16_f16 v5, v255, v255 -// GFX12: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_i16_f16 v5, s1, s2 // GFX12: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1331,11 +1331,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX12: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] -v_cvt_pk_norm_u16_f16 v5, v1, v2 -// GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] -v_cvt_pk_norm_u16_f16 v5, v255, v255 -// GFX12: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +v_cvt_pknorm_i16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pknorm_i16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] v_cvt_pk_norm_u16_f16 v5, s1, s2 // GFX12: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1376,6 +1388,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0] v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] // GFX12: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pknorm_u16_f16 v5, v1.h, v2.l +// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + +v_cvt_pknorm_u16_f16 v5, v255.l, v255.h +// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + v_cvt_pk_u16_f32 v5, v1, v2 // GFX12: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] @@ -1601,50 +1625,62 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo| v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| // GFX12: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] -v_div_fixup_f16 v5, v1, v2, s3 -// GFX12: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +v_div_fixup_f16 v5.l, v1.l, v2.l, s3 +// GFX12: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] + +v_div_fixup_f16 v5.l, v255.l, s2, s105 +// GFX12: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] + +v_div_fixup_f16 v5.l, s1, v255.l, exec_hi +// GFX12: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] + +v_div_fixup_f16 v5.l, s105, s105, exec_lo +// GFX12: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] + +v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l +// GFX12: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] -v_div_fixup_f16 v5, v255, s2, s105 -// GFX12: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l +// GFX12: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, s1, v255, exec_hi -// GFX12: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| +// GFX12: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] -v_div_fixup_f16 v5, s105, s105, exec_lo -// GFX12: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +v_div_fixup_f16 v5.l, m0, 0.5, m0 +// GFX12: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] -v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 -// GFX12: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi +// GFX12: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] -v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 -// GFX12: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] +// GFX12: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] -v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| -// GFX12: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] +// GFX12: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, m0, 0.5, m0 -// GFX12: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] +// GFX12: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] -v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi -// GFX12: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] +// GFX12: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] -v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] -// GFX12: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] +// GFX12: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] -v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0] -// GFX12: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp +// GFX12: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] -// GFX12: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +v_div_fixup_f16 v5.l, v255.h, s2, s105 +// GFX12: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] -v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] -// GFX12: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +v_div_fixup_f16 v5.l, s1, v255.h, exec_hi +// GFX12: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] -v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] -// GFX12: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h +// GFX12: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] -v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp -// GFX12: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp +// GFX12: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] v_div_fixup_f32 v5, v1, v2, s3 // GFX12: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index e7b12ec..b769324 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1442,6 +1442,12 @@ v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1484,6 +1490,12 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] @@ -1700,53 +1712,68 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0 v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -5203,20 +5230,20 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf // GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index b879e59..f76dd26 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -840,6 +840,12 @@ v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -852,6 +858,12 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -939,47 +951,62 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x54,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 -// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -3472,20 +3499,20 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0 v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 // GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] -v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] -v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s index a7d42c6..5ed2091 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s @@ -131,3 +131,221 @@ v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[1,0,0] v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,1,0] // GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand + +// op_sel not allowed in dot opcodes with 4- or 8-bit packed data + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. diff --git a/llvm/test/MC/AMDGPU/gfx908_err.s b/llvm/test/MC/AMDGPU/gfx908_err.s new file mode 100644 index 0000000..d39e9b5 --- /dev/null +++ b/llvm/test/MC/AMDGPU/gfx908_err.s @@ -0,0 +1,436 @@ +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefix=GFX908 --implicit-check-not=error: %s + +// op_sel not allowed in dot opcodes with 4- or 8-bit packed data + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s index 7b2acc6..a05c7fb 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_err.s +++ b/llvm/test/MC/AMDGPU/gfx90a_err.s @@ -239,3 +239,438 @@ scratch_load_lds_dword v2, off ds_read_b32 v0, v1 gds // GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: gds modifier is not supported on this GPU + +// op_sel not allowed in dot opcodes with 4- or 8-bit packed data + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + +v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] +// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. + diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 857da7c..9790473 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -1375,10 +1375,16 @@ # GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 # GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1419,11 +1425,41 @@ 0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + +0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 -# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 # GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1464,6 +1500,30 @@ 0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + +0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 # GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] @@ -1690,53 +1750,125 @@ # GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] 0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] 0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23 -# GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] 0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] # CHECK: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] 0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] +# W32-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] +# W32-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b] + +0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] @@ -4157,49 +4289,118 @@ # W64-FAKE16: v_max_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] +# W32-REAL16: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] +# W32-FAKE16: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64-REAL16: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64-FAKE16: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b] 0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] +# W32-REAL16: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] +# W32-FAKE16: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] +# W64-REAL16: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] +# W64-FAKE16: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33] 0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 -# GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_maxmin_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_maxmin_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00] @@ -5647,49 +5848,118 @@ # W64-FAKE16: v_min_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00 -# GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01 -# GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01 -# GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01 -# GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01 -# GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b -# GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +# W32-REAL16: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +# W32-FAKE16: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64-REAL16: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] +# W64-FAKE16: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b] 0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33 -# GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +# W32-REAL16: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +# W32-FAKE16: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +# W64-REAL16: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] +# W64-FAKE16: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33] 0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 -# GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_minmax_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_minmax_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] + +0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00 # GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 93992cb..4dc4f46 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -2113,46 +2113,118 @@ # W64-FAKE16: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] 0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] 0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13 +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -2833,46 +2905,118 @@ # W64-FAKE16: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] 0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] 0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13 +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] @@ -4113,130 +4257,322 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] 0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] 0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index a339d55..dcf57ee 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -1141,40 +1141,106 @@ # W64-FAKE16: v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] 0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05 -# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] 0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W32-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05 +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] + +0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05 +# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] +# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 +# W32-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -1585,40 +1651,106 @@ # W64-FAKE16: v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05] 0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05 -# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05] 0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 -# GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W32-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] + +0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05 +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05] + +0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05 +# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] +# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00 +# W32-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00] 0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] @@ -2349,70 +2481,202 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] 0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] 0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index d31e96af4..4c2060a 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -1393,10 +1393,16 @@ # GFX12: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf] 0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00 # GFX12: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00] @@ -1437,11 +1443,29 @@ 0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX12: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00 -# GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00] 0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00 -# GFX12: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00] 0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00 # GFX12: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00] @@ -1482,6 +1506,18 @@ 0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00 # GFX12: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00] +0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00] + +0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00] + 0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00 # GFX12: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00] @@ -1708,49 +1744,118 @@ # GFX12: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf] 0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00 -# GFX12: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W32-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W64-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00] 0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01 -# GFX12: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01] 0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01 -# GFX12: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01] 0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01 -# GFX12: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01] 0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 -# GFX12: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1 -# GFX12: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W32-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] +# W64-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1] 0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01 -# GFX12: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01] 0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01 -# GFX12: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W32-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01] 0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1 -# GFX12: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W32-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] +# W64-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1] 0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00 -# GFX12: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00] 0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3 -# GFX12: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W32-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] +# W64-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3] 0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43 -# GFX12: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W32-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] +# W64-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43] 0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23 -# GFX12: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W32-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W32-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W64-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] +# W64-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23] + +0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] + +0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01 +# W32-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01] + +0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01 +# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] +# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01] + +0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00 +# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00] 0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00 -# GFX12: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00] 0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00 # GFX12: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 53c1f2e..c64fe39 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4452,133 +4452,316 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] 0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] 0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] 0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] 0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] 0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] 0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] 0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index a77b90c..9ed20c7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -2622,73 +2622,196 @@ # W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] + +0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] + +0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00 -# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05] 0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05] 0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05] 0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05] 0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05] 0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05] 0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05 -# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05] + +0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] + +0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] + +0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + +0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05] + +0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05 +# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05] 0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00 -# GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] +# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt index aa3b4c7..310e23f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt @@ -778,291 +778,15 @@ # CHECK: v_dot4_i32_i8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c] 0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c - # CHECK: v_dot4_u32_u8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c] 0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c - # CHECK: v_dot8_i32_i4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c] 0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c - # CHECK: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c] 0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04] -0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14] -0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14 - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c] -0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c - -# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c] -0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c - # CHECK: v_dot2_f32_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0xc0,0xa3,0xd3,0x01,0x05,0x0e,0x1c] 0x00,0xc0,0xa3,0xd3,0x01,0x05,0x0e,0x1c diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll new file mode 100644 index 0000000..e7fbb3c --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll @@ -0,0 +1,70 @@ +;; Test callsite context graph generation for simple call graph with +;; two memprof contexts and no inlining, where one callsite required for +;; cloning is missing (e.g. unmatched). +;; +;; Original code looks like: +;; +;; char *foo() { +;; return new char[10]; +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(200); +;; delete[] y; +;; return 0; +;; } + +; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-report-hinted-sizes \ +; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \ +; RUN: --check-prefix=SIZESUNHINTED +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not "\"memprof\"=\"cold\"" + +source_filename = "memprof-missing-callsite.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + ;; Missing callsite metadata blocks cloning + %call = call ptr @_Z3foov() + %call1 = call ptr @_Z3foov() + ret i32 0 +} + +define internal ptr @_Z3foov() #0 { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { noinline optnone } + +!2 = !{!3, !5} +!3 = !{!4, !"notcold", !10} +!4 = !{i64 9086428284934609951, i64 8632435727821051414} +!5 = !{!6, !"cold", !11, !12} +!6 = !{i64 9086428284934609951, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!10 = !{i64 123, i64 100} +!11 = !{i64 456, i64 200} +!12 = !{i64 789, i64 300} + +; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning +; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning +; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll index 49c22bf..a6863cb 100644 --- a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll +++ b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll @@ -20,7 +20,9 @@ ; RUN: -stats -debug -save-temps \ ; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG -; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR +;; We should not see any type of cold attribute or cloning applied +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \ +; RUN: --implicit-check-not ".memprof." ;; Try again but with distributed ThinLTO ; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ @@ -39,26 +41,23 @@ ; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG ;; Run ThinLTO backend +;; We should not see any type of cold attribute or cloning applied ; RUN: opt -passes=memprof-context-disambiguation \ ; RUN: -memprof-import-summary=%t.o.thinlto.bc \ -; RUN: -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR +; RUN: -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \ +; RUN: --implicit-check-not ".memprof." ; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains) ; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains -;; Check that all calls in the IR are to the original functions, leading to a -;; non-cold operator new call. - source_filename = "tailcall-nonunique.cc" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: noinline -; IR-LABEL: @_Z3barv() define dso_local ptr @_Z3barv() local_unnamed_addr #0 { entry: - ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]] %call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9 ret ptr %call } @@ -67,54 +66,43 @@ entry: declare ptr @_Znam(i64) #1 ; Function Attrs: noinline -; IR-LABEL: @_Z5blah1v() define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z3barv() %call = tail call ptr @_Z3barv() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z5blah2v() define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z3barv() %call = tail call ptr @_Z3barv() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z4baz1v() define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z5blah1v() %call = tail call ptr @_Z5blah1v() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z4baz2v() define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 { entry: - ; IR: call ptr @_Z5blah2v() %call = tail call ptr @_Z5blah2v() ret ptr %call } ; Function Attrs: noinline -; IR-LABEL: @_Z3foob(i1 %b) define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 { entry: br i1 %b, label %if.then, label %if.else if.then: ; preds = %entry - ; IR: call ptr @_Z4baz1v() %call = tail call ptr @_Z4baz1v() br label %return if.else: ; preds = %entry - ; IR: call ptr @_Z4baz2v() %call1 = tail call ptr @_Z4baz2v() br label %return @@ -124,29 +112,21 @@ return: ; preds = %if.else, %if.then } ; Function Attrs: noinline -; IR-LABEL: @xyz() define dso_local i32 @xyz() local_unnamed_addr #0 { delete.end13: - ; IR: call ptr @_Z3foob(i1 true) %call = tail call ptr @_Z3foob(i1 true) - ; IR: call ptr @_Z3foob(i1 true) %call1 = tail call ptr @_Z3foob(i1 true) - ; IR: call ptr @_Z3foob(i1 false) %call2 = tail call ptr @_Z3foob(i1 false) - ; IR: call ptr @_Z3foob(i1 false) %call3 = tail call ptr @_Z3foob(i1 false) ret i32 0 } define dso_local i32 @main() local_unnamed_addr #0 { delete.end13: - ; IR: call i32 @xyz() %call1 = tail call i32 @xyz(), !callsite !11 ret i32 0 } -; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" } - attributes #0 = { noinline } attributes #1 = { nobuiltin allocsize(0) } attributes #2 = { builtin allocsize(0) } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll index 75cebae..bcb6ebc 100644 --- a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll +++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll @@ -7,14 +7,8 @@ ; RUN: -memprof-verify-ccg -memprof-verify-nodes \ ; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s -;; Make sure we created some clones -; CHECK: created clone A.memprof.1 -; CHECK: created clone C.memprof.1 -; CHECK: created clone D.memprof.1 -; CHECK: created clone E.memprof.1 -; CHECK: created clone B.memprof.1 -; CHECK: created clone F.memprof.1 -; CHECK: created clone G.memprof.1 +;; Make sure we successfully created at least one clone +; CHECK: created clone {{.*}}.memprof.1 ; ModuleID = '<stdin>' source_filename = "reduced.ll" diff --git a/llvm/test/Transforms/PGOProfile/memprof-undrift.test b/llvm/test/Transforms/PGOProfile/memprof-undrift.test new file mode 100644 index 0000000..45fb2f2 --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/memprof-undrift.test @@ -0,0 +1,174 @@ +; REQUIRES: x86_64-linux + +; Make sure that we can undrift the MemProf profile and annotate the IR +; accordingly. +; +; The IR was generated from: +; +; char *foo() { return ::new char[4]; } +; char *leaf() { return ::new char[4]; } +; char *middle() { return leaf(); } +; char *aaa() { return middle(); } +; char *bbb() { return middle(); } +; +; int main() { +; foo(); +; +; char *a = aaa(); +; char *b = bbb(); +; a[0] = 'a'; +; b[0] = 'b'; +; delete[] a; +; sleep(10); +; delete[] b; +; +; return 0; +; } + +; RUN: split-file %s %t +; RUN: llvm-profdata merge %t/memprof_undrift.yaml -o %t/memprof_undrift.memprofdata +; RUN: opt < %t/memprof_undrift.ll -passes='memprof-use<profile-filename=%t/memprof_undrift.memprofdata>' -memprof-salvage-stale-profile -memprof-ave-lifetime-cold-threshold=5 -S 2>&1 | FileCheck %s + +;--- memprof_undrift.yaml +--- +HeapProfileRecords: + - GUID: _Z3aaav + AllocSites: [] + CallSites: + - - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false } + - GUID: _Z6middlev + AllocSites: [] + CallSites: + - - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false } + - GUID: _Z3foov + AllocSites: + - Callstack: + - { Function: _Z3foov, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 4 + TotalLifetime: 10000 + TotalLifetimeAccessDensity: 0 + CallSites: [] + - GUID: _Z4leafv + AllocSites: + - Callstack: + - { Function: _Z4leafv, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 4 + TotalLifetime: 0 + TotalLifetimeAccessDensity: 25000 + - Callstack: + - { Function: _Z4leafv, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false } + - { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false } + MemInfoBlock: + AllocCount: 1 + TotalSize: 4 + TotalLifetime: 10000 + TotalLifetimeAccessDensity: 2 + CallSites: [] + - GUID: _Z3bbbv + AllocSites: [] + CallSites: + - - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false } +... +;--- memprof_undrift.ll +define dso_local ptr @_Z3foov() !dbg !5 { +; CHECK-LABEL: @_Z3foov() +entry: + %call = call ptr @_Znam(i64 4) #1, !dbg !8 +; CHECK: call ptr @_Znam(i64 4) #[[ATTR:[0-9]+]] + ret ptr %call, !dbg !9 +} + +; Function Attrs: nobuiltin allocsize(0) +declare ptr @_Znam(i64 noundef) #0 + +define dso_local ptr @_Z4leafv() !dbg !10 { +; CHECK-LABEL: @_Z4leafv() +entry: + %call = call ptr @_Znam(i64 4) #1, !dbg !11 +; CHECK: call ptr @_Znam(i64 4) {{.*}}, !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]] + ret ptr %call, !dbg !12 +} + +define dso_local ptr @_Z6middlev() !dbg !13 { +; CHECK-LABEL: @_Z6middlev() +entry: + %call.i = call ptr @_Znam(i64 4) #1, !dbg !14 +; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C2:[0-9]+]] + ret ptr %call.i, !dbg !16 +} + +define dso_local ptr @_Z3aaav() !dbg !17 { +; CHECK-LABEL: @_Z3aaav() +entry: + %call.i.i = call ptr @_Znam(i64 4) #1, !dbg !18 +; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C3:[0-9]+]] + ret ptr %call.i.i, !dbg !21 +} + +define dso_local ptr @_Z3bbbv() !dbg !22 { +; CHECK-LABEL: @_Z3bbbv() +entry: + %call.i.i = call ptr @_Znam(i64 4) #1, !dbg !23 +; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C4:[0-9]+]] + ret ptr %call.i.i, !dbg !26 +} + +attributes #0 = { nobuiltin allocsize(0) } +attributes #1 = { builtin allocsize(0) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1) +!1 = !DIFile(filename: "undrift.cc", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{} +!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 56, type: !6, unit: !0) +!6 = !DISubroutineType(types: !7) +!7 = !{} +!8 = !DILocation(line: 56, column: 22, scope: !5) +!9 = !DILocation(line: 56, column: 15, scope: !5) +!10 = distinct !DISubprogram(name: "leaf", linkageName: "_Z4leafv", scope: !1, file: !1, line: 58, type: !6, unit: !0) +!11 = !DILocation(line: 58, column: 23, scope: !10) +!12 = !DILocation(line: 58, column: 16, scope: !10) +!13 = distinct !DISubprogram(name: "middle", linkageName: "_Z6middlev", scope: !1, file: !1, line: 59, type: !6, unit: !0) +!14 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !15) +!15 = distinct !DILocation(line: 59, column: 25, scope: !13) +!16 = !DILocation(line: 59, column: 18, scope: !13) +!17 = distinct !DISubprogram(name: "aaa", linkageName: "_Z3aaav", scope: !1, file: !1, line: 61, type: !6, unit: !0) +!18 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !19) +!19 = distinct !DILocation(line: 59, column: 25, scope: !13, inlinedAt: !20) +!20 = distinct !DILocation(line: 61, column: 22, scope: !17) +!21 = !DILocation(line: 61, column: 15, scope: !17) +!22 = distinct !DISubprogram(name: "bbb", linkageName: "_Z3bbbv", scope: !1, file: !1, line: 62, type: !6, unit: !0) +!23 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !24) +!24 = distinct !DILocation(line: 59, column: 25, scope: !13, inlinedAt: !25) +!25 = distinct !DILocation(line: 62, column: 22, scope: !22) +!26 = !DILocation(line: 62, column: 15, scope: !22) + +; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="cold" } + +; CHECK: ![[M1]] = !{![[M1L:[0-9]+]], ![[M1R:[0-9]+]]} +; CHECK: ![[M1L]] = !{![[M1LL:[0-9]+]], !"cold"} +; CHECK: ![[M1LL]] = !{i64 -7165227774426488445, i64 6179674587295384169, i64 7749555980993309703} +; CHECK: ![[M1R]] = !{![[M1RL:[0-9]+]], !"notcold"} +; CHECK: ![[M1RL]] = !{i64 -7165227774426488445, i64 6179674587295384169, i64 -4748707735015301746} + +; CHECK: ![[C1]] = !{i64 -7165227774426488445} + +; CHECK: ![[C2]] = !{i64 6179674587295384169} + +; CHECK: ![[C3]] = !{i64 -4748707735015301746} + +; CHECK: ![[C4]] = !{i64 7749555980993309703} diff --git a/llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll b/llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll new file mode 100644 index 0000000..d3995f1 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s + +define void @foo() { +; CHECK-LABEL: define void @foo() { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], %[[BB3:.*]] ], [ zeroinitializer, %[[BB]] ] +; CHECK-NEXT: br label %[[BB3]] +; CHECK: [[BB3]]: +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[TMP0]] to <2 x i1> +; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i1> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i1> zeroinitializer, [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32 +; CHECK-NEXT: [[I22:%.*]] = or i32 [[TMP6]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[I22]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <2 x i32> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP11]] = select <2 x i1> [[TMP10]], <2 x i32> zeroinitializer, <2 x i32> zeroinitializer +; CHECK-NEXT: br label %[[BB1]] +; +bb: + br label %bb1 + +bb1: ; preds = %bb3, %bb + %i = phi i32 [ %i26, %bb3 ], [ 0, %bb ] + %i2 = phi i32 [ %i24, %bb3 ], [ 0, %bb ] + br label %bb3 + +bb3: ; preds = %bb1 + %i4 = zext i32 %i2 to i64 + %i5 = mul i64 %i4, 0 + %i10 = or i64 0, %i5 + %i11 = trunc i64 %i10 to i32 + %i12 = and i32 %i11, 0 + %i13 = zext i32 %i to i64 + %i14 = mul i64 %i13, 0 + %i19 = or i64 0, %i14 + %i20 = trunc i64 %i19 to i32 + %i21 = and i32 %i20, 0 + %i22 = or i32 %i12, %i21 + %i23 = icmp ult i32 %i22, 0 + %i24 = select i1 %i23, i32 0, i32 0 + %i25 = icmp ult i32 0, 0 + %i26 = select i1 %i25, i32 0, i32 0 + br label %bb1 +} diff --git a/llvm/test/tools/llvm-cov/branch-macros.cpp b/llvm/test/tools/llvm-cov/branch-macros.cpp index 73042ac..7f3d1e8 100644 --- a/llvm/test/tools/llvm-cov/branch-macros.cpp +++ b/llvm/test/tools/llvm-cov/branch-macros.cpp @@ -5,7 +5,7 @@ #define COND1 (a == b) #define COND2 (a != b) #define COND3 (COND1 && COND2) -#define COND4 (COND3 ? COND2 : COND1) +#define COND4 (COND3 ? COND2 : COND1) // CHECK: | Branch ([[@LINE]]:15): [True: 1, False: 2] #define MACRO1 COND3 #define MACRO2 MACRO1 #define MACRO3 MACRO2 diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s new file mode 100644 index 0000000..bdc02d4 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s @@ -0,0 +1,59 @@ +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOAND_D -mattr="+a" | FileCheck --check-prefix=AMOAND_D %s + +AMOAND_D: --- +AMOAND_D-NEXT: mode: latency +AMOAND_D-NEXT: key: +AMOAND_D-NEXT: instructions: +AMOAND_D-NEXT: - 'AMOAND_D [[RE01:X[0-9]+]] X10 [[RE01:X[0-9]+]]' +AMOAND_D-NEXT: config: '' +AMOAND_D-NEXT: register_initial_values: +AMOAND_D-NEXT: - '[[RE01:X[0-9]+]]=0x0' +AMOAND_D-DAG: ... + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOADD_W -mattr="+a" | FileCheck --check-prefix=AMOADD_W %s + +AMOADD_W: --- +AMOADD_W-NEXT: mode: latency +AMOADD_W-NEXT: key: +AMOADD_W-NEXT: instructions: +AMOADD_W-NEXT: - 'AMOADD_W [[RE02:X[0-9]+]] X10 [[RE02:X[0-9]+]]' +AMOADD_W-NEXT: config: '' +AMOADD_W-NEXT: register_initial_values: +AMOADD_W-NEXT: - '[[RE02:X[0-9]+]]=0x0' +AMOADD_W-DAG: ... + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOMAXU_D -mattr="+a" | FileCheck --check-prefix=AMOMAXU_D %s + +AMOMAXU_D: --- +AMOMAXU_D-NEXT: mode: latency +AMOMAXU_D-NEXT: key: +AMOMAXU_D-NEXT: instructions: +AMOMAXU_D-NEXT: - 'AMOMAXU_D [[RE03:X[0-9]+]] X10 [[RE03:X[0-9]+]]' +AMOMAXU_D-NEXT: config: '' +AMOMAXU_D-NEXT: register_initial_values: +AMOMAXU_D-NEXT: - '[[RE03:X[0-9]+]]=0x0' +AMOMAXU_D-DAG: ... + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOMIN_W -mattr="+a" | FileCheck --check-prefix=AMOMIN_W %s + +AMOMIN_W: --- +AMOMIN_W-NEXT: mode: latency +AMOMIN_W-NEXT: key: +AMOMIN_W-NEXT: instructions: +AMOMIN_W-NEXT: - 'AMOMIN_W [[RE04:X[0-9]+]] X10 [[RE04:X[0-9]+]]' +AMOMIN_W-NEXT: config: '' +AMOMIN_W-NEXT: register_initial_values: +AMOMIN_W-NEXT: - '[[RE04:X[0-9]+]]=0x0' +AMOMIN_W-DAG: ... + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOXOR_D -mattr="+a" | FileCheck --check-prefix=AMOXOR_D %s + +AMOXOR_D: --- +AMOXOR_D-NEXT: mode: latency +AMOXOR_D-NEXT: key: +AMOXOR_D-NEXT: instructions: +AMOXOR_D-NEXT: - 'AMOXOR_D [[RE05:X[0-9]+]] X10 [[RE05:X[0-9]+]]' +AMOXOR_D-NEXT: config: '' +AMOXOR_D-NEXT: register_initial_values: +AMOXOR_D-NEXT: - '[[RE05:X[0-9]+]]=0x0' +AMOXOR_D-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s new file mode 100644 index 0000000..9e94f02 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s @@ -0,0 +1,48 @@ +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_ADDI -mattr=+c | FileCheck --check-prefix=C_ADDI %s + +C_ADDI: --- +C_ADDI-NEXT: mode: latency +C_ADDI-NEXT: key: +C_ADDI-NEXT: instructions: +C_ADDI-NEXT: - 'C_ADDI [[REG01:X[0-9]+]] [[RE02:X[0-9]+]] [[IMM0:i_0x[0-9]+]]' + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_ADDIW -mattr=+c | FileCheck --check-prefix=C_ADDIW %s + +C_ADDIW: --- +C_ADDIW-NEXT: mode: latency +C_ADDIW-NEXT: key: +C_ADDIW-NEXT: instructions: +C_ADDIW-NEXT: - 'C_ADDIW [[REG11:X[0-9]+]] [[RE12:X[0-9]+]] [[IMM1:i_0x[0-9]+]]' + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_ANDI -mattr=+c | FileCheck --check-prefix=C_ANDI %s + +C_ANDI: --- +C_ANDI-NEXT: mode: latency +C_ANDI-NEXT: key: +C_ANDI-NEXT: instructions: +C_ANDI-NEXT: - 'C_ANDI [[REG31:X[0-9]+]] [[REG32:X[0-9]+]] [[IMM3:i_0x[0-9]+]]' + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_SLLI -mattr=+c | FileCheck --check-prefix=C_SLLI %s + +C_SLLI: --- +C_SLLI-NEXT: mode: latency +C_SLLI-NEXT: key: +C_SLLI-NEXT: instructions: +C_SLLI-NEXT: - 'C_SLLI [[REG81:X[0-9]+]] [[REG82:X[0-9]+]] [[IMM8:i_0x[0-9]+]]' + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_SRAI -mattr=+c | FileCheck --check-prefix=C_SRAI %s + +C_SRAI: --- +C_SRAI-NEXT: mode: latency +C_SRAI-NEXT: key: +C_SRAI-NEXT: instructions: +C_SRAI-NEXT: - 'C_SRAI [[REG91:X[0-9]+]] [[REG92:X[0-9]+]] [[IMM9:i_0x[0-9]+]]' + +# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_SRLI -mattr=+c | FileCheck --check-prefix=C_SRLI %s + +C_SRLI: --- +C_SRLI-NEXT: mode: latency +C_SRLI-NEXT: key: +C_SRLI-NEXT: instructions: +C_SRLI-NEXT: - 'C_SRLI [[REG101:X[0-9]+]] [[REG102:X[0-9]+]] [[IMM10:i_0x[0-9]+]]' +C_SRLI-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s new file mode 100644 index 0000000..2dea89c --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s @@ -0,0 +1,11 @@ +# RUN: llvm-exegesis -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -mode=latency --benchmark-phase=assemble-measured-code -mattr=+d -opcode-name=FADD_D | FileCheck %s + +CHECK: --- +CHECK-NEXT: mode: latency +CHECK-NEXT: key: +CHECK-NEXT: instructions: +CHECK-NEXT: - 'FADD_D [[REG1:F[0-9]+_D]] [[REG2:F[0-9]+_D]] [[REG3:F[0-9]+_D]] i_0x7' +CHECK-NEXT: config: '' +CHECK-NEXT: register_initial_values: +CHECK-DAG: - '[[REG1]]=0x0' +CHECK-DAG: ... diff --git a/llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg b/llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg new file mode 100644 index 0000000..466ccc2 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg @@ -0,0 +1,3 @@ +if not ("RISCV" in config.root.targets): + # We need support for RISCV. + config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt index 414b49e..d95c37f 100644 --- a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt +++ b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt @@ -12,6 +12,9 @@ endif() if (LLVM_TARGETS_TO_BUILD MATCHES "Mips") list(APPEND LLVM_EXEGESIS_TARGETS "Mips") endif() +if(LLVM_TARGETS_TO_BUILD MATCHES "RISCV") + list(APPEND LLVM_EXEGESIS_TARGETS "RISCV") +endif() set(LLVM_EXEGESIS_TARGETS ${LLVM_EXEGESIS_TARGETS} PARENT_SCOPE) diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp index 9c926d1..c9225e5 100644 --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp @@ -95,11 +95,12 @@ Instruction::Instruction(const MCInstrDesc *Description, StringRef Name, const BitVector *ImplDefRegs, const BitVector *ImplUseRegs, const BitVector *AllDefRegs, - const BitVector *AllUseRegs) + const BitVector *AllUseRegs, + const BitVector *NonMemoryRegs) : Description(*Description), Name(Name), Operands(std::move(Operands)), Variables(std::move(Variables)), ImplDefRegs(*ImplDefRegs), ImplUseRegs(*ImplUseRegs), AllDefRegs(*AllDefRegs), - AllUseRegs(*AllUseRegs) {} + AllUseRegs(*AllUseRegs), NonMemoryRegs(*NonMemoryRegs) {} std::unique_ptr<Instruction> Instruction::create(const MCInstrInfo &InstrInfo, @@ -166,6 +167,8 @@ Instruction::create(const MCInstrInfo &InstrInfo, BitVector ImplUseRegs = RATC.emptyRegisters(); BitVector AllDefRegs = RATC.emptyRegisters(); BitVector AllUseRegs = RATC.emptyRegisters(); + BitVector NonMemoryRegs = RATC.emptyRegisters(); + for (const auto &Op : Operands) { if (Op.isReg()) { const auto &AliasingBits = Op.getRegisterAliasing().aliasedBits(); @@ -177,6 +180,8 @@ Instruction::create(const MCInstrInfo &InstrInfo, ImplDefRegs |= AliasingBits; if (Op.isUse() && Op.isImplicit()) ImplUseRegs |= AliasingBits; + if (Op.isUse() && !Op.isMemory()) + NonMemoryRegs |= AliasingBits; } } // Can't use make_unique because constructor is private. @@ -185,7 +190,8 @@ Instruction::create(const MCInstrInfo &InstrInfo, std::move(Variables), BVC.getUnique(std::move(ImplDefRegs)), BVC.getUnique(std::move(ImplUseRegs)), BVC.getUnique(std::move(AllDefRegs)), - BVC.getUnique(std::move(AllUseRegs)))); + BVC.getUnique(std::move(AllUseRegs)), + BVC.getUnique(std::move(NonMemoryRegs)))); } const Operand &Instruction::getPrimaryOperand(const Variable &Var) const { @@ -240,6 +246,12 @@ bool Instruction::hasAliasingRegisters( ForbiddenRegisters); } +bool Instruction::hasAliasingNotMemoryRegisters( + const BitVector &ForbiddenRegisters) const { + return anyCommonExcludingForbidden(AllDefRegs, NonMemoryRegs, + ForbiddenRegisters); +} + bool Instruction::hasOneUseOrOneDef() const { return AllDefRegs.count() || AllUseRegs.count(); } diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h index f8ebc07..d7712e2 100644 --- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h +++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h @@ -133,6 +133,12 @@ struct Instruction { // aliasing Use and Def registers. bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const; + // Whether this instruction is self aliasing through some registers. + // Repeating this instruction may execute sequentially by picking aliasing + // Def and Not Memory Use registers. It may also execute in parallel by + // picking non aliasing Def and Not Memory Use registers. + bool hasAliasingNotMemoryRegisters(const BitVector &ForbiddenRegisters) const; + // Whether this instruction's registers alias with OtherInstr's registers. bool hasAliasingRegistersThrough(const Instruction &OtherInstr, const BitVector &ForbiddenRegisters) const; @@ -160,12 +166,15 @@ struct Instruction { const BitVector &ImplUseRegs; // The set of aliased implicit use registers. const BitVector &AllDefRegs; // The set of all aliased def registers. const BitVector &AllUseRegs; // The set of all aliased use registers. + // The set of all aliased not memory use registers. + const BitVector &NonMemoryRegs; + private: Instruction(const MCInstrDesc *Description, StringRef Name, SmallVector<Operand, 8> Operands, SmallVector<Variable, 4> Variables, const BitVector *ImplDefRegs, const BitVector *ImplUseRegs, const BitVector *AllDefRegs, - const BitVector *AllUseRegs); + const BitVector *AllUseRegs, const BitVector *NonMemoryRegs); }; // Instructions are expensive to instantiate. This class provides a cache of diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt new file mode 100644 index 0000000..489ac6d --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt @@ -0,0 +1,22 @@ +include_directories( + ${LLVM_MAIN_SRC_DIR}/lib/Target/RISCV + ${LLVM_BINARY_DIR}/lib/Target/RISCV +) + +set(LLVM_LINK_COMPONENTS + CodeGen + RISCV + Exegesis + Core + Support + ) + +add_llvm_library(LLVMExegesisRISCV + DISABLE_LLVM_LINK_LLVM_DYLIB + STATIC + Target.cpp + + DEPENDS + intrinsics_gen + RISCVCommonTableGen + ) diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp new file mode 100644 index 0000000..41d3615 --- /dev/null +++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp @@ -0,0 +1,272 @@ +//===-- Target.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../Target.h" + +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "MCTargetDesc/RISCVMatInt.h" +#include "RISCVInstrInfo.h" + +// include computeAvailableFeatures and computeRequiredFeatures. +#define GET_AVAILABLE_OPCODE_CHECKER +#include "RISCVGenInstrInfo.inc" + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +#include <vector> + +namespace llvm { +namespace exegesis { + +namespace { + +// Stores constant value to a general-purpose (integer) register. +static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) { + SmallVector<MCInst, 8> MCInstSeq; + std::vector<MCInst> MatIntInstrs; + MCRegister DestReg = Reg; + + RISCVMatInt::generateMCInstSeq(Value.getSExtValue(), STI, DestReg, MCInstSeq); + MatIntInstrs.resize(MCInstSeq.size()); + std::copy(MCInstSeq.begin(), MCInstSeq.end(), MatIntInstrs.begin()); + + return MatIntInstrs; +} + +const unsigned ScratchIntReg = RISCV::X30; // t5 + +// Stores constant bits to a floating-point register. +static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI, + unsigned Reg, const APInt &Bits, + unsigned FmvOpcode) { + std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits); + Instrs.push_back(MCInstBuilder(FmvOpcode).addReg(Reg).addReg(ScratchIntReg)); + return Instrs; +} + +// main idea is: +// we support APInt only if (represented as double) it has zero fractional +// part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5 +// and then do FCVT this is only reliable thing in 32-bit mode, otherwise we +// need to use __floatsidf +static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI, + unsigned Reg, const APInt &Bits) { + double D = Bits.bitsToDouble(); + double IPart; + double FPart = std::modf(D, &IPart); + + if (std::abs(FPart) > std::numeric_limits<double>::epsilon()) { + errs() << "loadFP64RegBits32 is not implemented for doubles like " << D + << ", please remove fractional part\n"; + return {}; + } + + std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits); + Instrs.push_back( + MCInstBuilder(RISCV::FCVT_D_W).addReg(Reg).addReg(ScratchIntReg)); + return Instrs; +} + +static MCInst nop() { + // ADDI X0, X0, 0 + return MCInstBuilder(RISCV::ADDI) + .addReg(RISCV::X0) + .addReg(RISCV::X0) + .addImm(0); +} + +static bool isVectorRegList(unsigned Reg) { + return RISCV::VRM2RegClass.contains(Reg) || + RISCV::VRM4RegClass.contains(Reg) || + RISCV::VRM8RegClass.contains(Reg) || + RISCV::VRN2M1RegClass.contains(Reg) || + RISCV::VRN2M2RegClass.contains(Reg) || + RISCV::VRN2M4RegClass.contains(Reg) || + RISCV::VRN3M1RegClass.contains(Reg) || + RISCV::VRN3M2RegClass.contains(Reg) || + RISCV::VRN4M1RegClass.contains(Reg) || + RISCV::VRN4M2RegClass.contains(Reg) || + RISCV::VRN5M1RegClass.contains(Reg) || + RISCV::VRN6M1RegClass.contains(Reg) || + RISCV::VRN7M1RegClass.contains(Reg) || + RISCV::VRN8M1RegClass.contains(Reg); +} + +class ExegesisRISCVTarget : public ExegesisTarget { +public: + ExegesisRISCVTarget(); + + bool matchesArch(Triple::ArchType Arch) const override; + + std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg, + const APInt &Value) const override; + + unsigned getDefaultLoopCounterRegister(const Triple &) const override; + + void decrementLoopCounterAndJump(MachineBasicBlock &MBB, + MachineBasicBlock &TargetMBB, + const MCInstrInfo &MII, + unsigned LoopRegister) const override; + + unsigned getScratchMemoryRegister(const Triple &TT) const override; + + void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg, + unsigned Offset) const override; + + ArrayRef<unsigned> getUnavailableRegisters() const override; + + Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, + MCOperand &AssignedValue, + const BitVector &ForbiddenRegs) const override; + + std::vector<InstructionTemplate> + generateInstructionVariants(const Instruction &Instr, + unsigned MaxConfigsPerOpcode) const override; +}; + +ExegesisRISCVTarget::ExegesisRISCVTarget() + : ExegesisTarget(ArrayRef<CpuAndPfmCounters>{}, + RISCV_MC::isOpcodeAvailable) {} + +bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const { + return Arch == Triple::riscv32 || Arch == Triple::riscv64; +} + +std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI, + unsigned Reg, + const APInt &Value) const { + if (RISCV::GPRRegClass.contains(Reg)) + return loadIntReg(STI, Reg, Value); + if (RISCV::FPR16RegClass.contains(Reg)) + return loadFPRegBits(STI, Reg, Value, RISCV::FMV_H_X); + if (RISCV::FPR32RegClass.contains(Reg)) + return loadFPRegBits(STI, Reg, Value, RISCV::FMV_W_X); + if (RISCV::FPR64RegClass.contains(Reg)) { + if (STI.hasFeature(RISCV::Feature64Bit)) + return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X); + return loadFP64RegBits32(STI, Reg, Value); + } + if (Reg == RISCV::FRM || Reg == RISCV::VL || Reg == RISCV::VLENB || + Reg == RISCV::VTYPE || RISCV::GPRPairRegClass.contains(Reg) || + RISCV::VRRegClass.contains(Reg) || isVectorRegList(Reg)) { + // Don't initialize: + // - FRM + // - VL, VLENB, VTYPE + // - vector registers (and vector register lists) + // - Zfinx registers + // Generate 'NOP' so that exegesis treats such registers as initialized + // (it tries to initialize them with '0' anyway). + return {nop()}; + } + errs() << "setRegTo is not implemented for Reg " << Reg + << ", results will be unreliable\n"; + return {}; +} + +const unsigned DefaultLoopCounterReg = RISCV::X31; // t6 +const unsigned ScratchMemoryReg = RISCV::X10; // a0 + +unsigned +ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const { + return DefaultLoopCounterReg; +} + +void ExegesisRISCVTarget::decrementLoopCounterAndJump( + MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB, + const MCInstrInfo &MII, unsigned LoopRegister) const { + BuildMI(&MBB, DebugLoc(), MII.get(RISCV::ADDI)) + .addDef(LoopRegister) + .addUse(LoopRegister) + .addImm(-1); + BuildMI(&MBB, DebugLoc(), MII.get(RISCV::BNE)) + .addUse(LoopRegister) + .addUse(RISCV::X0) + .addMBB(&TargetMBB); +} + +unsigned ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const { + return ScratchMemoryReg; // a0 +} + +void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT, + unsigned Reg, + unsigned Offset) const { + // TODO: for now we ignore Offset because have no way + // to detect it in instruction. + auto &I = IT.getInstr(); + + auto MemOpIt = + find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); }); + assert(MemOpIt != I.Operands.end() && + "Instruction must have memory operands"); + + const Operand &MemOp = *MemOpIt; + + assert(MemOp.isReg() && "Memory operand expected to be register"); + + IT.getValueFor(MemOp) = MCOperand::createReg(Reg); +} + +const unsigned UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg, + ScratchIntReg, ScratchMemoryReg}; + +ArrayRef<unsigned> ExegesisRISCVTarget::getUnavailableRegisters() const { + return UnavailableRegisters; +} + +Error ExegesisRISCVTarget::randomizeTargetMCOperand( + const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, + const BitVector &ForbiddenRegs) const { + uint8_t OperandType = + Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType; + + switch (OperandType) { + case RISCVOp::OPERAND_FRMARG: + AssignedValue = MCOperand::createImm(RISCVFPRndMode::DYN); + break; + case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO: + AssignedValue = MCOperand::createImm(0b1 << 4); + break; + case RISCVOp::OPERAND_SIMM6_NONZERO: + case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO: + AssignedValue = MCOperand::createImm(1); + break; + default: + if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM && + OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM) + AssignedValue = MCOperand::createImm(0); + } + return Error::success(); +} + +std::vector<InstructionTemplate> +ExegesisRISCVTarget::generateInstructionVariants( + const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const { + InstructionTemplate IT{&Instr}; + for (const Operand &Op : Instr.Operands) + if (Op.isMemory()) { + IT.getValueFor(Op) = MCOperand::createReg(ScratchMemoryReg); + } + return {IT}; +} + +} // anonymous namespace + +static ExegesisTarget *getTheRISCVExegesisTarget() { + static ExegesisRISCVTarget Target; + return &Target; +} + +void InitializeRISCVExegesisTarget() { + ExegesisTarget::registerTarget(getTheRISCVExegesisTarget()); +} + +} // namespace exegesis +} // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp index 7100b51..25cdf1c 100644 --- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp @@ -53,13 +53,6 @@ computeAliasingInstructions(const LLVMState &State, const Instruction *Instr, if (OtherOpcode == Instr->Description.getOpcode()) continue; const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode); - const MCInstrDesc &OtherInstrDesc = OtherInstr.Description; - // Ignore instructions that we cannot run. - if (OtherInstrDesc.isPseudo() || OtherInstrDesc.usesCustomInsertionHook() || - OtherInstrDesc.isBranch() || OtherInstrDesc.isIndirectBranch() || - OtherInstrDesc.isCall() || OtherInstrDesc.isReturn()) { - continue; - } if (OtherInstr.hasMemoryOperands()) continue; if (!ET.allowAsBackToBack(OtherInstr)) @@ -81,12 +74,10 @@ static ExecutionMode getExecutionModes(const Instruction &Instr, EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS; if (Instr.hasMemoryOperands()) EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR; - else { - if (Instr.hasAliasingRegisters(ForbiddenRegisters)) - EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; - if (Instr.hasOneUseOrOneDef()) - EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; - } + if (Instr.hasAliasingNotMemoryRegisters(ForbiddenRegisters)) + EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS; + if (Instr.hasOneUseOrOneDef()) + EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR; return EM; } diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp index 7dcff60..48357d4 100644 --- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp +++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp @@ -73,6 +73,9 @@ Error SnippetGenerator::generateConfigurations( for (CodeTemplate &CT : Templates) { // TODO: Generate as many BenchmarkCode as needed. { + CT.ScratchSpacePointerInReg = + State.getExegesisTarget().getScratchMemoryRegister( + State.getTargetMachine().getTargetTriple()); BenchmarkCode BC; BC.Info = CT.Info; BC.Key.Instructions.reserve(CT.Instructions.size()); @@ -108,6 +111,12 @@ std::vector<RegisterValue> SnippetGenerator::computeRegisterInitialValues( // Loop invariant: DefinedRegs[i] is true iif it has been set at least once // before the current instruction. BitVector DefinedRegs = State.getRATC().emptyRegisters(); + // If target always expects a scratch memory register as live input, + // mark it as defined. + const ExegesisTarget &Target = State.getExegesisTarget(); + unsigned ScratchMemoryReg = Target.getScratchMemoryRegister( + State.getTargetMachine().getTargetTriple()); + DefinedRegs.set(ScratchMemoryReg); std::vector<RegisterValue> RIV; for (const InstructionTemplate &IT : Instructions) { // Returns the register that this Operand sets or uses, or 0 if this is not @@ -200,7 +209,8 @@ static void setRegisterOperandValue(const RegisterOperandAssignment &ROV, if (ROV.Op->isExplicit()) { auto &AssignedValue = IB.getValueFor(*ROV.Op); if (AssignedValue.isValid()) { - assert(AssignedValue.isReg() && AssignedValue.getReg() == ROV.Reg); + // TODO don't re-assign register operands which are already "locked" + // by Target in corresponding InstructionTemplate return; } AssignedValue = MCOperand::createReg(ROV.Reg); diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 546ec770..fa37e05 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -274,6 +274,10 @@ static cl::opt<int> BenchmarkProcessCPU( cl::desc("The CPU number that the benchmarking process should executon on"), cl::cat(BenchmarkOptions), cl::init(-1)); +static cl::opt<std::string> MAttr( + "mattr", cl::desc("comma-separated list of target architecture features"), + cl::value_desc("+feature1,-feature2,..."), cl::cat(Options), cl::init("")); + static ExitOnError ExitOnErr("llvm-exegesis error: "); // Helper function that logs the error(s) and exits. @@ -296,6 +300,18 @@ T ExitOnFileError(const Twine &FileName, Expected<T> &&E) { return std::move(*E); } +static const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State, + unsigned Opcode) { + const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description; + if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook()) + return "Unsupported opcode: isPseudo/usesCustomInserter"; + if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) + return "Unsupported opcode: isBranch/isIndirectBranch"; + if (InstrDesc.isCall() || InstrDesc.isReturn()) + return "Unsupported opcode: isCall/isReturn"; + return nullptr; +} + // Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided, // and returns the opcode indices or {} if snippets should be read from // `SnippetsFile`. @@ -334,6 +350,7 @@ static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { return I->getSecond(); return 0u; }; + SmallVector<StringRef, 2> Pieces; StringRef(OpcodeNames.getValue()) .split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false); @@ -352,17 +369,11 @@ static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) { static Expected<std::vector<BenchmarkCode>> generateSnippets(const LLVMState &State, unsigned Opcode, const BitVector &ForbiddenRegs) { - const Instruction &Instr = State.getIC().getInstr(Opcode); - const MCInstrDesc &InstrDesc = Instr.Description; // Ignore instructions that we cannot run. - if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook()) - return make_error<Failure>( - "Unsupported opcode: isPseudo/usesCustomInserter"); - if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch()) - return make_error<Failure>("Unsupported opcode: isBranch/isIndirectBranch"); - if (InstrDesc.isCall() || InstrDesc.isReturn()) - return make_error<Failure>("Unsupported opcode: isCall/isReturn"); + if (const char *Reason = getIgnoredOpcodeReasonOrNull(State, Opcode)) + return make_error<Failure>(Reason); + const Instruction &Instr = State.getIC().getInstr(Opcode); const std::vector<InstructionTemplate> InstructionVariants = State.getExegesisTarget().generateInstructionVariants( Instr, MaxConfigsPerOpcode); @@ -485,8 +496,8 @@ void benchmarkMain() { LLVMInitialize##TargetName##AsmParser(); #include "llvm/Config/TargetExegesis.def" - const LLVMState State = - ExitOnErr(LLVMState::Create(TripleName, MCPU, "", UseDummyPerfCounters)); + const LLVMState State = ExitOnErr( + LLVMState::Create(TripleName, MCPU, MAttr, UseDummyPerfCounters)); // Preliminary check to ensure features needed for requested // benchmark mode are present on target CPU and/or OS. diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp index 55cbe83..847cca7 100644 --- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp @@ -40,13 +40,12 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) { VPBlockUtils::connectBlocks(VPBB2, VPBB4); VPBlockUtils::connectBlocks(VPBB3, VPBB4); - auto TC = std::make_unique<VPValue>(); LLVMContext C; auto *ScalarHeader = BasicBlock::Create(C, ""); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB0); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -123,11 +122,10 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) { VPBlockUtils::connectBlocks(R2BB1, R2BB2); VPBlockUtils::connectBlocks(R1, R2); - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB0); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); VPDominatorTree VPDT; VPDT.recalculate(Plan); @@ -206,11 +204,10 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) { VPBasicBlock *VPBB2 = new VPBasicBlock("VPBB2"); VPBlockUtils::connectBlocks(R1, VPBB2); - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); VPDominatorTree VPDT; VPDT.recalculate(Plan); diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 97a1f81..6402d6f 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -257,11 +257,10 @@ TEST(VPBasicBlockTest, getPlan) { VPBlockUtils::connectBlocks(VPBB2, VPBB4); VPBlockUtils::connectBlocks(VPBB3, VPBB4); - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, VPBB2->getPlan()); @@ -280,11 +279,10 @@ TEST(VPBasicBlockTest, getPlan) { VPBasicBlock *VPBB1 = new VPBasicBlock(); VPBlockUtils::connectBlocks(VPBB1, R1); - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, R1->getPlan()); @@ -313,11 +311,10 @@ TEST(VPBasicBlockTest, getPlan) { VPBlockUtils::connectBlocks(R1, VPBB2); VPBlockUtils::connectBlocks(R2, VPBB2); - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); EXPECT_EQ(&Plan, VPBB1->getPlan()); EXPECT_EQ(&Plan, R1->getPlan()); @@ -360,11 +357,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { EXPECT_EQ(VPBB2, FromIterator[1]); // Use Plan to properly clean up created blocks. - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); } { @@ -463,11 +459,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { EXPECT_EQ(R1, FromIterator[7]); // Use Plan to properly clean up created blocks. - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB0); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); } { @@ -549,11 +544,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { EXPECT_EQ(VPBB1, FromIterator[9]); // Use Plan to properly clean up created blocks. - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); } { @@ -600,11 +594,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { EXPECT_EQ(VPBB1, FromIterator[4]); // Use Plan to properly clean up created blocks. - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); } { @@ -695,11 +688,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) { EXPECT_EQ(VPBB1, FromIterator[3]); // Use Plan to properly clean up created blocks. - auto TC = std::make_unique<VPValue>(); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); } delete ScalarHeader; } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp index 570d202..6448153 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp @@ -24,7 +24,6 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPBB1->appendRecipe(UseI); VPBB1->appendRecipe(DefI); - auto TC = std::make_unique<VPValue>(); VPBasicBlock *VPBB2 = new VPBasicBlock(); VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); @@ -34,7 +33,7 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) { VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -66,13 +65,12 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) { VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); - auto TC = std::make_unique<VPValue>(); LLVMContext C; auto *ScalarHeader = BasicBlock::Create(C, ""); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -115,12 +113,11 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); - auto TC = std::make_unique<VPValue>(); auto *ScalarHeader = BasicBlock::Create(C, ""); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -156,13 +153,12 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBlockUtils::connectBlocks(VPBB1, R1); - auto TC = std::make_unique<VPValue>(); LLVMContext C; auto *ScalarHeader = BasicBlock::Create(C, ""); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -199,13 +195,12 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) { VPBlockUtils::connectBlocks(VPBB1, R1); VPBB3->setParent(R1); - auto TC = std::make_unique<VPValue>(); LLVMContext C; auto *ScalarHeader = BasicBlock::Create(C, ""); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); #if GTEST_HAS_STREAM_REDIRECTION ::testing::internal::CaptureStderr(); @@ -233,13 +228,12 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) { VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1"); VPBlockUtils::connectBlocks(VPBB1, R1); - auto TC = std::make_unique<VPValue>(); LLVMContext C; auto *ScalarHeader = BasicBlock::Create(C, ""); VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader); VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB); VPBlockUtils::connectBlocks(VPPH, VPBB1); - VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB); + VPlan Plan(VPPH, ScalarHeaderVPBB); VPBB1->setParent(R1); #if GTEST_HAS_STREAM_REDIRECTION diff --git a/llvm/utils/gn/secondary/lld/Common/BUILD.gn b/llvm/utils/gn/secondary/lld/Common/BUILD.gn index b528654..c0b1c45 100644 --- a/llvm/utils/gn/secondary/lld/Common/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/Common/BUILD.gn @@ -33,6 +33,7 @@ static_library("Common") { ] sources = [ "Args.cpp", + "BPSectionOrdererBase.cpp", "CommonLinkerContext.cpp", "DWARF.cpp", "DriverDispatcher.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn index 274f5b5..6552645 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn @@ -21,6 +21,7 @@ static_library("Passes") { ] sources = [ "CodeGenPassBuilder.cpp", + "DroppedVariableStats.cpp", "OptimizationLevel.cpp", "PassBuilder.cpp", "PassBuilderBindings.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn index e4ca566..0f34231 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -28,7 +28,7 @@ unittest("IRTests") { "DemandedBitsTest.cpp", "DominatorTreeBatchUpdatesTest.cpp", "DominatorTreeTest.cpp", - "DroppedVariableStatsTest.cpp", + "DroppedVariableStatsIRTest.cpp", "FunctionTest.cpp", "IRBuilderTest.cpp", "InstructionsTest.cpp", diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 4866e31..983f7a2 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -459,7 +459,8 @@ public: /// Starting from `value`, follow the use-def chain in reverse, always /// selecting the aliasing OpOperands. Find and return Values for which /// `condition` evaluates to true. OpOperands of such matching Values are not - /// traversed any further. + /// traversed any further, the visited aliasing opOperands will be preserved + /// through `visitedOpOperands`. /// /// When reaching the end of a chain, also return the last Value of that /// chain if `config.alwaysIncludeLeaves` is set. @@ -484,7 +485,8 @@ public: /// `config`. SetVector<Value> findValueInReverseUseDefChain( Value value, llvm::function_ref<bool(Value)> condition, - TraversalConfig config = TraversalConfig()) const; + TraversalConfig config = TraversalConfig(), + llvm::DenseSet<OpOperand *> *visitedOpOperands = nullptr) const; /// Find the values that may define the contents of the given value at /// runtime. A block argument is always a definition. An OpResult is a diff --git a/mlir/include/mlir/IR/Constraints.td b/mlir/include/mlir/IR/Constraints.td index 13223aa..62bc84c 100644 --- a/mlir/include/mlir/IR/Constraints.td +++ b/mlir/include/mlir/IR/Constraints.td @@ -93,6 +93,12 @@ class Or<list<Pred> children> : CombinedPred<PredCombinerOr, children>; // A predicate that holds if its child does not. class Neg<Pred child> : CombinedPred<PredCombinerNot, [child]>; +// A predicate that is always true. +defvar TruePred = And<[]>; + +// A predicate that is always false. +defvar False = Or<[]>; + // A predicate that substitutes "pat" with "repl" in predicate calls of the // leaves of the predicate tree (i.e., not CombinedPred). // diff --git a/mlir/include/mlir/IR/Properties.td b/mlir/include/mlir/IR/Properties.td index 0becf7d..df630ad 100644 --- a/mlir/include/mlir/IR/Properties.td +++ b/mlir/include/mlir/IR/Properties.td @@ -13,6 +13,8 @@ #ifndef PROPERTIES #define PROPERTIES +include "mlir/IR/Constraints.td" + // Base class for defining properties. class Property<string storageTypeParam = "", string desc = ""> { // User-readable one line summary used in error reporting messages. If empty, @@ -63,6 +65,13 @@ class Property<string storageTypeParam = "", string desc = ""> { return convertFromAttribute($_storage, $_attr, $_diag); }]; + // The verification predicate for this property. Defaults to the true predicate, + // since properties are always their expected type. + // Within the predicate, `$_self` is an instance of the **interface** + // type of the property. Setting this field to ? will also result in a + // true predicate but is not recommended, as it breaks composability. + Pred predicate = TruePred; + // The call expression to hash the property. // // Format: @@ -150,8 +159,8 @@ class Property<string storageTypeParam = "", string desc = ""> { return ::mlir::failure(); }]; - // Base definition for the property. (Will be) used for `OptionalProperty` and - // such cases, analogously to `baseAttr`. + // Base definition for the property. Used to look through `OptionalProperty` + // for some format generation, as with the `baseAttr` field on attributes. Property baseProperty = ?; // Default value for the property within its storage. This should be an expression @@ -224,8 +233,7 @@ def I64Property : IntProperty<"int64_t">; class EnumProperty<string storageTypeParam, string desc = "", string default = ""> : Property<storageTypeParam, desc> { - // TODO: take advantage of EnumAttrInfo and the like to make this share nice - // parsing code with EnumAttr. + // TODO: implement predicate for enum validity. let writeToMlirBytecode = [{ $_writer.writeVarInt(static_cast<uint64_t>($_storage)); }]; @@ -331,6 +339,59 @@ def UnitProperty : Property<"bool", "unit property"> { } //===----------------------------------------------------------------------===// +// Property field overwrites + +/// Class for giving a property a default value. +/// This doesn't change anything about the property other than giving it a default +/// which can be used by ODS to elide printing. +class DefaultValuedProperty<Property p, string default = "", string storageDefault = ""> : Property<p.storageType, p.summary> { + let defaultValue = default; + let storageTypeValueOverride = storageDefault; + let baseProperty = p; + // Keep up to date with `Property` above. + let summary = p.summary; + let description = p.description; + let storageType = p.storageType; + let interfaceType = p.interfaceType; + let convertFromStorage = p.convertFromStorage; + let assignToStorage = p.assignToStorage; + let convertToAttribute = p.convertToAttribute; + let convertFromAttribute = p.convertFromAttribute; + let predicate = p.predicate; + let hashProperty = p.hashProperty; + let parser = p.parser; + let optionalParser = p.optionalParser; + let printer = p.printer; + let readFromMlirBytecode = p.readFromMlirBytecode; + let writeToMlirBytecode = p.writeToMlirBytecode; +} + +/// Apply the predicate `pred` to the property `p`, ANDing it with any +/// predicates it may already have. If `newSummary` is provided, replace the +/// summary of `p` with `newSummary`. +class ConfinedProperty<Property p, Pred pred, string newSummary = ""> + : Property<p.storageType, !if(!empty(newSummary), p.summary, newSummary)> { + let predicate = !if(!ne(p.predicate, TruePred), And<[p.predicate, pred]>, pred); + let baseProperty = p; + // Keep up to date with `Property` above. + let description = p.description; + let storageType = p.storageType; + let interfaceType = p.interfaceType; + let convertFromStorage = p.convertFromStorage; + let assignToStorage = p.assignToStorage; + let convertToAttribute = p.convertToAttribute; + let convertFromAttribute = p.convertFromAttribute; + let hashProperty = p.hashProperty; + let parser = p.parser; + let optionalParser = p.optionalParser; + let printer = p.printer; + let readFromMlirBytecode = p.readFromMlirBytecode; + let writeToMlirBytecode = p.writeToMlirBytecode; + let defaultValue = p.defaultValue; + let storageTypeValueOverride = p.storageTypeValueOverride; +} + +//===----------------------------------------------------------------------===// // Primitive property combinators /// Create a variable named `name` of `prop`'s storage type that is initialized @@ -342,14 +403,35 @@ class _makePropStorage<Property prop, string name> { true : "") # ";"; } +/// Construct a `Pred`icate `ret` that wraps the predicate of the underlying +/// property `childProp` with: +/// +/// [](childProp.storageType& s) { +/// return [](childProp.interfaceType i) { +/// return leafSubst(childProp.predicate, "$_self" to "i"); +/// }(childProp.convertFromStorage(s)) +/// } +/// +/// and then appends `prefix` and `suffix`. +class _makeStorageWrapperPred<Property wrappedProp> { + Pred ret = + Concat< + "[](" # "const " # wrappedProp.storageType + # "& baseStore) -> bool { return [](" + # wrappedProp.interfaceType # " baseIface) -> bool { return (", + SubstLeaves<"$_self", "baseIface", wrappedProp.predicate>, + "); }(" # !subst("$_storage", "baseStore", wrappedProp.convertFromStorage) + # "); }">; +} + /// The generic class for arrays of some other property, which is stored as a /// `SmallVector` of that property. This uses an `ArrayAttr` as its attribute form /// though subclasses can override this, as is the case with IntArrayAttr below. /// Those wishing to use a non-default number of SmallVector elements should /// subclass `ArrayProperty`. -class ArrayProperty<Property elem = Property<>, string desc = ""> : - Property<"::llvm::SmallVector<" # elem.storageType # ">", desc> { - let summary = "array of " # elem.summary; +class ArrayProperty<Property elem = Property<>, string newSummary = ""> : + Property<"::llvm::SmallVector<" # elem.storageType # ">", + !if(!empty(newSummary), "array of " # elem.summary, newSummary)> { let interfaceType = "::llvm::ArrayRef<" # elem.storageType # ">"; let convertFromStorage = "::llvm::ArrayRef<" # elem.storageType # ">{$_storage}"; let assignToStorage = "$_storage.assign($_value.begin(), $_value.end())"; @@ -382,6 +464,10 @@ class ArrayProperty<Property elem = Property<>, string desc = ""> : return ::mlir::ArrayAttr::get($_ctxt, elems); }]; + let predicate = !if(!eq(elem.predicate, TruePred), + TruePred, + Concat<"::llvm::all_of($_self, ", _makeStorageWrapperPred<elem>.ret, ")">); + defvar theParserBegin = [{ auto& storage = $_storage; auto parseElemFn = [&]() -> ::mlir::ParseResult { @@ -463,8 +549,8 @@ class ArrayProperty<Property elem = Property<>, string desc = ""> : }]); } -class IntArrayProperty<string storageTypeParam = "", string desc = ""> : - ArrayProperty<IntProperty<storageTypeParam, desc>> { +class IntArrayProperty<Property elem, string newSummary=""> : + ArrayProperty<elem, newSummary> { // Bring back the trivial conversions we don't get in the general case. let convertFromAttribute = [{ return convertFromAttribute($_storage, $_attr, $_diag); @@ -474,30 +560,6 @@ class IntArrayProperty<string storageTypeParam = "", string desc = ""> : }]; } -/// Class for giving a property a default value. -/// This doesn't change anything about the property other than giving it a default -/// which can be used by ODS to elide printing. -class DefaultValuedProperty<Property p, string default = "", string storageDefault = ""> : Property<p.storageType, p.summary> { - let defaultValue = default; - let storageTypeValueOverride = storageDefault; - let baseProperty = p; - // Keep up to date with `Property` above. - let summary = p.summary; - let description = p.description; - let storageType = p.storageType; - let interfaceType = p.interfaceType; - let convertFromStorage = p.convertFromStorage; - let assignToStorage = p.assignToStorage; - let convertToAttribute = p.convertToAttribute; - let convertFromAttribute = p.convertFromAttribute; - let hashProperty = p.hashProperty; - let parser = p.parser; - let optionalParser = p.optionalParser; - let printer = p.printer; - let readFromMlirBytecode = p.readFromMlirBytecode; - let writeToMlirBytecode = p.writeToMlirBytecode; -} - /// An optional property, stored as an std::optional<p.storageType> /// interfaced with as an std::optional<p.interfaceType>.. /// The syntax is `none` (or empty string if elided) for an absent value or @@ -575,6 +637,11 @@ class OptionalProperty<Property p, bit canDelegateParsing = 1> return ::mlir::ArrayAttr::get($_ctxt, {attr}); }]; + let predicate = !if(!ne(p.predicate, TruePred), + Or<[CPred<"!$_self.has_value()">, + SubstLeaves<"$_self", "(*($_self))", p.predicate>]>, + TruePred); + defvar delegatedParserBegin = [{ if (::mlir::succeeded($_parser.parseOptionalKeyword("none"))) { $_storage = std::nullopt; diff --git a/mlir/include/mlir/TableGen/Property.h b/mlir/include/mlir/TableGen/Property.h index 702e675..3861591 100644 --- a/mlir/include/mlir/TableGen/Property.h +++ b/mlir/include/mlir/TableGen/Property.h @@ -27,6 +27,7 @@ namespace mlir { namespace tblgen { class Dialect; class Type; +class Pred; // Wrapper class providing helper methods for accessing MLIR Property defined // in TableGen. This class should closely reflect what is defined as class @@ -74,6 +75,10 @@ public: return convertFromAttributeCall; } + // Return the property's predicate. Properties that didn't come from + // tablegen (the hardcoded ones) have the null predicate. + Pred getPredicate() const; + // Returns the method call which parses this property from textual MLIR. StringRef getParserCall() const { return parserCall; } diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp index 065739e..f8a7a22 100644 --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -483,10 +483,12 @@ bool AnalysisState::isValueRead(Value value) const { // Starting from `value`, follow the use-def chain in reverse, always selecting // the aliasing OpOperands. Find and return Values for which `condition` // evaluates to true. OpOperands of such matching Values are not traversed any -// further. +// further, the visited aliasing opOperands will be preserved through +// `visitedOpOperands`. llvm::SetVector<Value> AnalysisState::findValueInReverseUseDefChain( Value value, llvm::function_ref<bool(Value)> condition, - TraversalConfig config) const { + TraversalConfig config, + llvm::DenseSet<OpOperand *> *visitedOpOperands) const { llvm::DenseSet<Value> visited; llvm::SetVector<Value> result, workingSet; workingSet.insert(value); @@ -553,6 +555,8 @@ llvm::SetVector<Value> AnalysisState::findValueInReverseUseDefChain( } workingSet.insert(a.opOperand->get()); + if (visitedOpOperands) + visitedOpOperands->insert(a.opOperand); } } diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp index cb2efef..abc0635 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp @@ -48,27 +48,20 @@ neededValuesDominateInsertionPoint(const DominanceInfo &domInfo, return true; } -/// Return true if the given `insertionPoint` dominates all uses of -/// `emptyTensorOp`. -static bool insertionPointDominatesUses(const DominanceInfo &domInfo, - Operation *insertionPoint, - Operation *emptyTensorOp) { - return llvm::all_of(emptyTensorOp->getUsers(), [&](Operation *user) { - return domInfo.dominates(insertionPoint, user); - }); -} - -/// Find a valid insertion point for a replacement of `emptyTensorOp`, assuming -/// that the replacement may use any value from `neededValues`. +/// Find a valid insertion point for a replacement of `emptyTensorOp`'s +/// use of `user` operation, assuming that the replacement may use any +/// value from `neededValues`. static Operation * -findValidInsertionPoint(Operation *emptyTensorOp, +findValidInsertionPoint(Operation *emptyTensorOp, Operation *user, const SmallVector<Value> &neededValues) { DominanceInfo domInfo; + Operation *candidateInsertionPoint = emptyTensorOp; - // Gather all possible insertion points: the location of `emptyTensorOp` and - // right after the definition of each value in `neededValues`. + // Gather all possible insertion points: the location of + // `candidateInsertionPoint` and right after the definition of each value in + // `neededValues`. SmallVector<Operation *> insertionPointCandidates; - insertionPointCandidates.push_back(emptyTensorOp); + insertionPointCandidates.push_back(candidateInsertionPoint); for (Value val : neededValues) { // Note: The anchor op is using all of `neededValues`, so: // * in case of a block argument: There must be at least one op in the block @@ -90,8 +83,8 @@ findValidInsertionPoint(Operation *emptyTensorOp, if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint, neededValues)) continue; - // Check if the insertion point is before all uses. - if (!insertionPointDominatesUses(domInfo, insertionPoint, emptyTensorOp)) + // Check if the insertion point is before the use to be replaced. + if (!domInfo.dominates(insertionPoint, user)) continue; return insertionPoint; } @@ -103,8 +96,9 @@ findValidInsertionPoint(Operation *emptyTensorOp, LogicalResult mlir::bufferization::eliminateEmptyTensors( RewriterBase &rewriter, Operation *op, OneShotAnalysisState &state) { OpBuilder::InsertionGuard g(rewriter); - + llvm::DenseSet<OpOperand *> visitedOpOperands; op->walk([&](SubsetInsertionOpInterface op) { + visitedOpOperands.clear(); OpOperand &source = op.getSourceOperand(); // Skip operands that do not bufferize inplace. "tensor.empty" could still // be replaced, but the transformation may not be beneficial. @@ -131,16 +125,28 @@ LogicalResult mlir::bufferization::eliminateEmptyTensors( config.followSameTypeOrCastsOnly = true; SetVector<Value> emptyTensors = state.findValueInReverseUseDefChain( source.get(), /*condition=*/ - [&](Value val) { return val.getDefiningOp<tensor::EmptyOp>(); }, - config); + [&](Value val) { return val.getDefiningOp<tensor::EmptyOp>(); }, config, + &visitedOpOperands); for (Value v : emptyTensors) { Operation *emptyTensorOp = v.getDefiningOp(); + // Find the use to be replaced from the use-def chain. + auto iter = llvm::find_if( + visitedOpOperands, [&emptyTensorOp](OpOperand *opOperand) { + return llvm::count(emptyTensorOp->getUses(), *opOperand); + }); + // This could be achieved when a use of `emptyTensorOp` is being + // consumed by `SubsetInsertionOpInterface`'s source directly. + if (iter == visitedOpOperands.end()) + continue; + OpOperand *useToBeReplaced = *iter; + Operation *user = useToBeReplaced->getOwner(); + // Find a suitable insertion point. If no suitable insertion point for // the replacement can be found, skip this replacement. Operation *insertionPoint = - findValidInsertionPoint(emptyTensorOp, neededValues); + findValidInsertionPoint(emptyTensorOp, user, neededValues); if (!insertionPoint) continue; @@ -159,8 +165,10 @@ LogicalResult mlir::bufferization::eliminateEmptyTensors( replacement = rewriter.create<tensor::CastOp>(v.getLoc(), v.getType(), replacement); } - // Replace the tensor::EmptyOp. - rewriter.replaceOp(emptyTensorOp, replacement); + // Replace the specific use of the tensor::EmptyOp. + rewriter.modifyOpInPlace(user, [&]() { + user->setOperand(useToBeReplaced->getOperandNumber(), replacement); + }); state.resetCache(); } diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index ad70981..491b5f4 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -327,8 +327,8 @@ SmallVector<int64_t> vector::getAsIntegers(ArrayRef<OpFoldResult> foldResults) { SmallVector<int64_t> ints; llvm::transform( foldResults, std::back_inserter(ints), [](OpFoldResult foldResult) { - assert(foldResult.is<Attribute>() && "Unexpected non-constant index"); - return cast<IntegerAttr>(foldResult.get<Attribute>()).getInt(); + assert(isa<Attribute>(foldResult) && "Unexpected non-constant index"); + return cast<IntegerAttr>(cast<Attribute>(foldResult)).getInt(); }); return ints; } @@ -346,7 +346,7 @@ SmallVector<Value> vector::getAsValues(OpBuilder &builder, Location loc, loc, cast<IntegerAttr>(attr).getInt()) .getResult(); - return foldResult.get<Value>(); + return cast<Value>(foldResult); }); return values; } @@ -1353,8 +1353,8 @@ LogicalResult vector::ExtractOp::verify() { return emitOpError( "expected position attribute of rank no greater than vector rank"); for (auto [idx, pos] : llvm::enumerate(position)) { - if (pos.is<Attribute>()) { - int64_t constIdx = cast<IntegerAttr>(pos.get<Attribute>()).getInt(); + if (auto attr = dyn_cast<Attribute>(pos)) { + int64_t constIdx = cast<IntegerAttr>(attr).getInt(); if (constIdx < 0 || constIdx >= getSourceVectorType().getDimSize(idx)) { return emitOpError("expected position attribute #") << (idx + 1) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index bd5f06a..b0892d1 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -251,7 +251,7 @@ static SmallVector<int64_t> getReducedShape(ArrayRef<OpFoldResult> mixedSizes) { continue; } - auto value = cast<IntegerAttr>(size.get<Attribute>()).getValue(); + auto value = cast<IntegerAttr>(cast<Attribute>(size)).getValue(); if (value == 1) continue; reducedShape.push_back(value.getSExtValue()); @@ -570,8 +570,8 @@ static SmallVector<Value> getCollapsedIndices(RewriterBase &rewriter, collapsedOffset = affine::makeComposedFoldedAffineApply( rewriter, loc, collapsedExpr, collapsedVals); - if (collapsedOffset.is<Value>()) { - indicesAfterCollapsing.push_back(collapsedOffset.get<Value>()); + if (auto value = dyn_cast<Value>(collapsedOffset)) { + indicesAfterCollapsing.push_back(value); } else { indicesAfterCollapsing.push_back(rewriter.create<arith::ConstantIndexOp>( loc, *getConstantIntValue(collapsedOffset))); @@ -841,8 +841,8 @@ class RewriteScalarExtractElementOfTransferRead OpFoldResult ofr = affine::makeComposedFoldedAffineApply( rewriter, loc, sym0 + sym1, {newIndices[newIndices.size() - 1], extractOp.getPosition()}); - if (ofr.is<Value>()) { - newIndices[newIndices.size() - 1] = ofr.get<Value>(); + if (auto value = dyn_cast<Value>(ofr)) { + newIndices[newIndices.size() - 1] = value; } else { newIndices[newIndices.size() - 1] = rewriter.create<arith::ConstantIndexOp>(loc, @@ -879,14 +879,14 @@ class RewriteScalarExtractOfTransferRead SmallVector<Value> newIndices(xferOp.getIndices().begin(), xferOp.getIndices().end()); for (auto [i, pos] : llvm::enumerate(extractOp.getMixedPosition())) { - assert(pos.is<Attribute>() && "Unexpected non-constant index"); - int64_t offset = cast<IntegerAttr>(pos.get<Attribute>()).getInt(); + assert(isa<Attribute>(pos) && "Unexpected non-constant index"); + int64_t offset = cast<IntegerAttr>(cast<Attribute>(pos)).getInt(); int64_t idx = newIndices.size() - extractOp.getNumIndices() + i; OpFoldResult ofr = affine::makeComposedFoldedAffineApply( rewriter, extractOp.getLoc(), rewriter.getAffineSymbolExpr(0) + offset, {newIndices[idx]}); - if (ofr.is<Value>()) { - newIndices[idx] = ofr.get<Value>(); + if (auto value = dyn_cast<Value>(ofr)) { + newIndices[idx] = value; } else { newIndices[idx] = rewriter.create<arith::ConstantIndexOp>( extractOp.getLoc(), *getConstantIntValue(ofr)); diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index 20cd9cb..21ec718e 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -598,9 +598,9 @@ struct BubbleDownVectorBitCastForExtract // Get the first element of the mixed position as integer. auto mixedPos = extractOp.getMixedPosition(); - if (mixedPos.size() > 0 && !mixedPos[0].is<Attribute>()) + if (mixedPos.size() > 0 && !isa<Attribute>(mixedPos[0])) return failure(); - uint64_t index = cast<IntegerAttr>(mixedPos[0].get<Attribute>()).getInt(); + uint64_t index = cast<IntegerAttr>(cast<Attribute>(mixedPos[0])).getInt(); // Get the single scalar (as a vector) in the source value that packs the // desired scalar. E.g. extract vector<1xf32> from vector<4xf32> diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 2291d64..59df0cd 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -1385,7 +1385,7 @@ LogicalResult SimpleAffineExprFlattener::visitModExpr(AffineBinaryOpExpr expr) { lhs[getLocalVarStartIndex() + numLocals - 1] = -rhsConst; } else { // Reuse the existing local id. - lhs[getLocalVarStartIndex() + loc] = -rhsConst; + lhs[getLocalVarStartIndex() + loc] -= rhsConst; } return success(); } diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp index f71dd0b..e5cfc07 100644 --- a/mlir/lib/TableGen/Predicate.cpp +++ b/mlir/lib/TableGen/Predicate.cpp @@ -235,6 +235,16 @@ propagateGroundTruth(PredNode *node, return node; } + if (node->kind == PredCombinerKind::And && node->children.empty()) { + node->kind = PredCombinerKind::True; + return node; + } + + if (node->kind == PredCombinerKind::Or && node->children.empty()) { + node->kind = PredCombinerKind::False; + return node; + } + // Otherwise, look at child nodes. // Move child nodes into some local variable so that they can be optimized diff --git a/mlir/lib/TableGen/Property.cpp b/mlir/lib/TableGen/Property.cpp index b86b87df..1385116 100644 --- a/mlir/lib/TableGen/Property.cpp +++ b/mlir/lib/TableGen/Property.cpp @@ -14,6 +14,7 @@ #include "mlir/TableGen/Property.h" #include "mlir/TableGen/Format.h" #include "mlir/TableGen/Operator.h" +#include "mlir/TableGen/Predicate.h" #include "llvm/TableGen/Record.h" using namespace mlir; @@ -68,8 +69,8 @@ Property::Property(StringRef summary, StringRef description, StringRef writeToMlirBytecodeCall, StringRef hashPropertyCall, StringRef defaultValue, StringRef storageTypeValueOverride) - : summary(summary), description(description), storageType(storageType), - interfaceType(interfaceType), + : def(nullptr), summary(summary), description(description), + storageType(storageType), interfaceType(interfaceType), convertFromStorageCall(convertFromStorageCall), assignToStorageCall(assignToStorageCall), convertToAttributeCall(convertToAttributeCall), @@ -91,6 +92,15 @@ StringRef Property::getPropertyDefName() const { return def->getName(); } +Pred Property::getPredicate() const { + if (!def) + return Pred(); + const llvm::RecordVal *maybePred = def->getValue("predicate"); + if (!maybePred || !maybePred->getValue()) + return Pred(); + return Pred(maybePred->getValue()); +} + Property Property::getBaseProperty() const { if (const auto *defInit = llvm::dyn_cast<llvm::DefInit>(def->getValueInit("baseProperty"))) { diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir index 2ba8246..9150986 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir @@ -55,6 +55,7 @@ func.func @buffer_forwarding_conflict_with_different_element_type(%arg0: tensor< // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] %cst = arith.constant 0.000000e+00 : f32 + // CHECK: bufferization.alloc_tensor(%arg1) %0 = tensor.empty(%arg1) : tensor<?xf32> // CHECK: bufferization.alloc_tensor(%arg1) diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir index efe59af..2643477 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir @@ -365,3 +365,103 @@ func.func @multiple_materialize_in_destination_buffer(%m: memref<5xf32>, %f: f32 bufferization.materialize_in_destination %selected in restrict writable %m : (tensor<5xf32>, memref<5xf32>) -> () return } + +// ----- + +// `EmptyTensorElimination` fails to find a valid insertion +// point for the new injected `SubsetExtraction`. +// CHECK-LABEL: func.func @fail_to_eliminate_any_empty_tensors +func.func @fail_to_eliminate_any_empty_tensors() -> tensor<5x6x128xf32> { + %cst_1 = arith.constant 1.0 : f32 + %cst_2 = arith.constant 2.0 : f32 + // CHECK: memref.alloc + // CHECK: memref.alloc + // CHECK: memref.alloc + %empty_1 = tensor.empty() : tensor<5x6x64xf32> + %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + %empty_2 = tensor.empty() : tensor<5x6x64xf32> + %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_2 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32> + // CHECK: memref.copy + %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + return %inserted_slice_2 : tensor<5x6x128xf32> +} + +// ----- + +// CHECK-LABEL: func.func @succeed_to_eliminate_one_empty_tensor +func.func @succeed_to_eliminate_one_empty_tensor() -> tensor<5x6x128xf32> { + %cst_1 = arith.constant 1.0 : f32 + %cst_2 = arith.constant 2.0 : f32 + // CHECK: memref.alloc() {alignment = 64 : i64} : memref<5x6x128xf32> + // CHECK: memref.alloc + // CHECK-NOT: memref.alloc + %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32> + %empty_1 = tensor.empty() : tensor<5x6x64xf32> + %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + %empty_2 = tensor.empty() : tensor<5x6x64xf32> + %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_2 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + // CHECK: memref.copy + %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + return %inserted_slice_2 : tensor<5x6x128xf32> +} + +// ----- + +// `EmptyTensorElimination` will replace the specific use of the tensor +// empty with the new injected `SubsetExtraction`, i.e. the specific use +// which has been tracked. + +// CHECK-ELIM-LABEL: func.func @mutli_use_of_the_same_tensor_empty +// CHECK-LABEL: func.func @mutli_use_of_the_same_tensor_empty +func.func @mutli_use_of_the_same_tensor_empty() -> tensor<5x6x128xf32> { + %cst_1 = arith.constant 1.0 : f32 + %cst_2 = arith.constant 2.0 : f32 + %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32> + %empty_1 = tensor.empty() : tensor<5x6x64xf32> + // CHECK-ELIM: %[[VAL_3:.*]] = tensor.extract_slice + // CHECK-ELIM: linalg.fill ins(%[[VAL_0:.*]] : f32) outs(%[[VAL_3]] + // CHECK-ELIM-NOT: linalg.fill ins(%[[VAL_1:.*]] : f32) outs(%[[VAL_3]] + %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + // CHECK: memref.copy + %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + // CHECK-NOT: memref.copy + %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + return %inserted_slice_2 : tensor<5x6x128xf32> +} + +// ----- + +// CHECK-LABEL: func.func @mutli_use_of_the_same_tensor_empty_creates_non_existent_read +// CHECK-ELIM-LABEL: func.func @mutli_use_of_the_same_tensor_empty_creates_non_existent_read +func.func @mutli_use_of_the_same_tensor_empty_creates_non_existent_read(%arg1: tensor<5x6x128xf32> , %arg2: tensor<5x6x64xf32>) + -> (tensor<5x6x128xf32>, tensor<5x6x64xf32>) { + %cst_1 = arith.constant 1.0 : f32 + %empty_1 = tensor.empty() : tensor<5x6x64xf32> + // CHECK: memref.alloc() {alignment = 64 : i64} : memref<5x6x64xf32> + // CHECK-NOT: memref.alloc + %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32> + %res_2 = linalg.generic{ + indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>], + iterator_types = ["parallel", "parallel", "parallel"] + } + ins(%empty_1 : tensor<5x6x64xf32>) + outs(%arg2 :tensor<5x6x64xf32>) { + ^bb0(%in: f32, %out: f32): + %res = arith.addf %in, %in : f32 + linalg.yield %res : f32 + } -> tensor<5x6x64xf32> + // CHECK-NOT: memref.copy + %inserted_slice_1 = tensor.insert_slice %res_1 into %arg1[0, 0, 0][5, 6, 64][1, 1, 1] + : tensor<5x6x64xf32> into tensor<5x6x128xf32> + return %inserted_slice_1, %res_2 : tensor<5x6x128xf32>, tensor<5x6x64xf32> +} diff --git a/mlir/test/IR/test-op-property-predicates.mlir b/mlir/test/IR/test-op-property-predicates.mlir new file mode 100644 index 0000000..3a4e297 --- /dev/null +++ b/mlir/test/IR/test-op-property-predicates.mlir @@ -0,0 +1,148 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s + +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +test.op_with_property_predicates <{ + scalar = 1 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'scalar' failed to satisfy constraint: non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = -1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'optional' failed to satisfy constraint: optional non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [-1 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'defaulted' failed to satisfy constraint: non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = -1 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'more_constrained' failed to satisfy constraint: between 0 and 5}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 100 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'array' failed to satisfy constraint: array of non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [-1 : i64], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_unconstrained' failed to satisfy constraint: non-empty array of int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [], + non_empty_constrained = [6 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_constrained' failed to satisfy constraint: non-empty array of non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_constrained' failed to satisfy constraint: non-empty array of non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [-1 : i64], + non_empty_optional = [[7 : i64]], + unconstrained = 8 : i64}> + +// ----- + +// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_optional' failed to satisfy constraint: optional non-empty array of non-negative int64_t}} +test.op_with_property_predicates <{ + scalar = 1 : i64, + optional = [2 : i64], + defaulted = 3 : i64, + more_constrained = 4 : i64, + array = [], + non_empty_unconstrained = [5: i64], + non_empty_constrained = [6 : i64], + non_empty_optional = [[]], + unconstrained = 8 : i64}> diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index d24d52f..384139d 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2998,7 +2998,7 @@ def TestOpWithProperties : TEST_Op<"with_properties"> { StrAttr:$b, // Attributes can directly be used here. StringProperty:$c, BoolProperty:$flag, - IntArrayProperty<"int64_t">:$array // example of an array + IntArrayProperty<I64Property>:$array // example of an array ); } @@ -3040,15 +3040,15 @@ def TestOpWithEmptyProperties : TEST_Op<"empty_properties"> { def TestOpUsingPropertyInCustom : TEST_Op<"using_property_in_custom"> { let assemblyFormat = "custom<UsingPropertyInCustom>($prop) attr-dict"; - let arguments = (ins IntArrayProperty<"int64_t">:$prop); + let arguments = (ins IntArrayProperty<I64Property>:$prop); } def TestOpUsingPropertyInCustomAndOther : TEST_Op<"using_property_in_custom_and_other"> { let assemblyFormat = "custom<UsingPropertyInCustom>($prop) prop-dict attr-dict"; let arguments = (ins - IntArrayProperty<"int64_t">:$prop, - IntProperty<"int64_t">:$other + IntArrayProperty<I64Property>:$prop, + I64Property:$other ); } @@ -3253,6 +3253,30 @@ def TestOpWithArrayProperties : TEST_Op<"with_array_properties"> { ); } +def NonNegativeI64Property : ConfinedProperty<I64Property, + CPred<"$_self >= 0">, "non-negative int64_t">; + +class NonEmptyArray<Property p> : ConfinedProperty + <ArrayProperty<p>, Neg<CPred<"$_self.empty()">>, + "non-empty array of " # p.summary>; + +def OpWithPropertyPredicates : TEST_Op<"op_with_property_predicates"> { + let arguments = (ins + NonNegativeI64Property:$scalar, + OptionalProperty<NonNegativeI64Property>:$optional, + DefaultValuedProperty<NonNegativeI64Property, "0">:$defaulted, + ConfinedProperty<NonNegativeI64Property, + CPred<"$_self <= 5">, "between 0 and 5">:$more_constrained, + ArrayProperty<NonNegativeI64Property>:$array, + NonEmptyArray<I64Property>:$non_empty_unconstrained, + NonEmptyArray<NonNegativeI64Property>:$non_empty_constrained, + // Test applying predicates when the fromStorage() on the optional<> isn't trivial. + OptionalProperty<NonEmptyArray<NonNegativeI64Property>>:$non_empty_optional, + I64Property:$unconstrained + ); + let assemblyFormat = "attr-dict prop-dict"; +} + //===----------------------------------------------------------------------===// // Test Dataflow //===----------------------------------------------------------------------===// diff --git a/mlir/test/mlir-tblgen/op-properties-predicates.td b/mlir/test/mlir-tblgen/op-properties-predicates.td new file mode 100644 index 0000000..fa06e14 --- /dev/null +++ b/mlir/test/mlir-tblgen/op-properties-predicates.td @@ -0,0 +1,73 @@ +// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s + +include "mlir/IR/Constraints.td" +include "mlir/IR/EnumAttr.td" +include "mlir/IR/OpBase.td" +include "mlir/IR/Properties.td" + +def Test_Dialect : Dialect { + let name = "test"; + let cppNamespace = "foobar"; +} +class NS_Op<string mnemonic, list<Trait> traits = []> : + Op<Test_Dialect, mnemonic, traits>; + +def NonNegativeI64Property : ConfinedProperty<I64Property, + CPred<"$_self >= 0">, "non-negative int64_t">; + +class NonEmptyArray<Property p> : ConfinedProperty + <ArrayProperty<p>, Neg<CPred<"$_self.empty()">>, + "non-empty array of " # p.summary>; + +def OpWithPredicates : NS_Op<"op_with_predicates"> { + let arguments = (ins + NonNegativeI64Property:$scalar, + OptionalProperty<NonNegativeI64Property>:$optional, + DefaultValuedProperty<NonNegativeI64Property, "0">:$defaulted, + ConfinedProperty<NonNegativeI64Property, + CPred<"$_self <= 5">, "between 0 and 5">:$moreConstrained, + ArrayProperty<NonNegativeI64Property>:$array, + NonEmptyArray<I64Property>:$non_empty_unconstrained, + NonEmptyArray<NonNegativeI64Property>:$non_empty_constrained, + // Test applying predicates when the fromStorage() on the optional<> isn't trivial. + OptionalProperty<NonEmptyArray<NonNegativeI64Property>>:$non_empty_optional, + I64Property:$unconstrained + ); +} + +// CHECK-LABEL: OpWithPredicates::verifyInvariantsImpl() +// Note: for test readibility, we capture [[maybe_unused]] into the variable maybe_unused +// CHECK: [[maybe_unused:\[\[maybe_unused\]\]]] int64_t tblgen_scalar = this->getScalar(); +// CHECK: if (!((tblgen_scalar >= 0))) +// CHECK-NEXT: return emitOpError("property 'scalar' failed to satisfy constraint: non-negative int64_t"); + +// CHECK: [[maybe_unused]] std::optional<int64_t> tblgen_optional = this->getOptional(); +// CHECK: if (!(((!tblgen_optional.has_value())) || (((*(tblgen_optional)) >= 0)))) +// CHECK-NEXT: return emitOpError("property 'optional' failed to satisfy constraint: optional non-negative int64_t"); + +// CHECK: [[maybe_unused]] int64_t tblgen_defaulted = this->getDefaulted(); +// CHECK: if (!((tblgen_defaulted >= 0))) +// CHECK-NEXT: return emitOpError("property 'defaulted' failed to satisfy constraint: non-negative int64_t"); + +// CHECK: [[maybe_unused]] int64_t tblgen_moreConstrained = this->getMoreConstrained(); +// CHECK: if (!(((tblgen_moreConstrained >= 0)) && ((tblgen_moreConstrained <= 5)))) +// CHECK-NEXT: return emitOpError("property 'moreConstrained' failed to satisfy constraint: between 0 and 5"); + +// CHECK: [[maybe_unused]] ::llvm::ArrayRef<int64_t> tblgen_array = this->getArray(); +// CHECK: if (!(::llvm::all_of(tblgen_array, [](const int64_t& baseStore) -> bool { return [](int64_t baseIface) -> bool { return ((baseIface >= 0)); }(baseStore); }))) +// CHECK-NEXT: return emitOpError("property 'array' failed to satisfy constraint: array of non-negative int64_t"); + +// CHECK: [[maybe_unused]] ::llvm::ArrayRef<int64_t> tblgen_nonEmptyUnconstrained = this->getNonEmptyUnconstrained(); +// CHECK: if (!(!((tblgen_nonEmptyUnconstrained.empty())))) +// CHECK-NEXT: return emitOpError("property 'non_empty_unconstrained' failed to satisfy constraint: non-empty array of int64_t"); + +// CHECK: [[maybe_unused]] ::llvm::ArrayRef<int64_t> tblgen_nonEmptyConstrained = this->getNonEmptyConstrained(); +// CHECK: if (!((::llvm::all_of(tblgen_nonEmptyConstrained, [](const int64_t& baseStore) -> bool { return [](int64_t baseIface) -> bool { return ((baseIface >= 0)); }(baseStore); })) && (!((tblgen_nonEmptyConstrained.empty()))))) +// CHECK-NEXT: return emitOpError("property 'non_empty_constrained' failed to satisfy constraint: non-empty array of non-negative int64_t"); + +// CHECK: [[maybe_unused]] std::optional<::llvm::ArrayRef<int64_t>> tblgen_nonEmptyOptional = this->getNonEmptyOptional(); +// CHECK: (!(((!tblgen_nonEmptyOptional.has_value())) || ((::llvm::all_of((*(tblgen_nonEmptyOptional)), [](const int64_t& baseStore) -> bool { return [](int64_t baseIface) -> bool { return ((baseIface >= 0)); }(baseStore); })) && (!(((*(tblgen_nonEmptyOptional)).empty())))))) +// CHECK-NEXT: return emitOpError("property 'non_empty_optional' failed to satisfy constraint: optional non-empty array of non-negative int64_t"); + +// CHECK-NOT: int64_t tblgen_unconstrained +// CHECK: return ::mlir::success(); diff --git a/mlir/test/mlir-tblgen/op-properties.td b/mlir/test/mlir-tblgen/op-properties.td index 918583c..bf32ce4 100644 --- a/mlir/test/mlir-tblgen/op-properties.td +++ b/mlir/test/mlir-tblgen/op-properties.td @@ -20,7 +20,7 @@ def OpWithAttr : NS_Op<"op_with_attr">{ // Test required and optional properties // --- -def DefaultI64Array : IntArrayProperty<"int64_t"> { +def DefaultI64Array : IntArrayProperty<I64Property> { let defaultValue = "::llvm::ArrayRef<int64_t>{}"; let storageTypeValueOverride = "::llvm::SmallVector<int64_t>{}"; } diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 30a2d4d..a970cbc 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1033,6 +1033,61 @@ while (true) {{ emitVerifier(namedAttr.attr, namedAttr.name, getVarName(namedAttr.name)); } +static void genPropertyVerifier(const OpOrAdaptorHelper &emitHelper, + FmtContext &ctx, MethodBody &body) { + + // Code to get a reference to a property into a variable to avoid multiple + // evaluations while verifying a property. + // {0}: Property variable name. + // {1}: Property name, with the first letter capitalized, to find the getter. + // {2}: Property interface type. + const char *const fetchProperty = R"( + [[maybe_unused]] {2} {0} = this->get{1}(); +)"; + + // Code to verify that the predicate of a property holds. Embeds the + // condition inline. + // {0}: Property condition code, with tgfmt() applied. + // {1}: Emit error prefix. + // {2}: Property name. + // {3}: Property description. + const char *const verifyProperty = R"( + if (!({0})) + return {1}"property '{2}' failed to satisfy constraint: {3}"); +)"; + + // Prefix variables with `tblgen_` to avoid hiding the attribute accessor. + const auto getVarName = [&](const NamedProperty &prop) { + std::string varName = + convertToCamelFromSnakeCase(prop.name, /*capitalizeFirst=*/false); + return (tblgenNamePrefix + Twine(varName)).str(); + }; + + for (const NamedProperty &prop : emitHelper.getOp().getProperties()) { + Pred predicate = prop.prop.getPredicate(); + // Null predicate, nothing to verify. + if (predicate == Pred()) + continue; + + std::string rawCondition = predicate.getCondition(); + if (rawCondition == "true") + continue; + bool needsOp = StringRef(rawCondition).contains("$_op"); + if (needsOp && !emitHelper.isEmittingForOp()) + continue; + + auto scope = body.scope("{\n", "}\n", /*indent=*/true); + std::string varName = getVarName(prop); + std::string getterName = + convertToCamelFromSnakeCase(prop.name, /*capitalizeFirst=*/true); + body << formatv(fetchProperty, varName, getterName, + prop.prop.getInterfaceType()); + body << formatv(verifyProperty, tgfmt(rawCondition, &ctx.withSelf(varName)), + emitHelper.emitErrorPrefix(), prop.name, + prop.prop.getSummary()); + } +} + /// Include declarations specified on NativeTrait static std::string formatExtraDeclarations(const Operator &op) { SmallVector<StringRef> extraDeclarations; @@ -3726,6 +3781,7 @@ void OpEmitter::genVerifier() { bool useProperties = emitHelper.hasProperties(); populateSubstitutions(emitHelper, verifyCtx); + genPropertyVerifier(emitHelper, verifyCtx, implBody); genAttributeVerifier(emitHelper, verifyCtx, implBody, staticVerifierEmitter, useProperties); genOperandResultVerifier(implBody, op.getOperands(), "operand"); diff --git a/mlir/unittests/IR/AffineExprTest.cpp b/mlir/unittests/IR/AffineExprTest.cpp index 9e89a5b..8a2d697 100644 --- a/mlir/unittests/IR/AffineExprTest.cpp +++ b/mlir/unittests/IR/AffineExprTest.cpp @@ -129,3 +129,21 @@ TEST(AffineExprTest, d0PlusD0FloorDivNeg2) { auto sum = d0 + d0.floorDiv(-2) * 2; ASSERT_EQ(toString(sum), "d0 + (d0 floordiv -2) * 2"); } + +TEST(AffineExprTest, simpleAffineExprFlattenerRegression) { + + // Regression test for a bug where mod simplification was not handled + // properly when `lhs % rhs` was happened to have the property that `lhs + // floordiv rhs = lhs`. + MLIRContext ctx; + OpBuilder b(&ctx); + + auto d0 = b.getAffineDimExpr(0); + + // Manually replace variables by constants to avoid constant folding. + AffineExpr expr = (d0 - (d0 + 2)).floorDiv(8) % 8; + AffineExpr result = mlir::simplifyAffineExpr(expr, 1, 0); + + ASSERT_TRUE(isa<AffineConstantExpr>(result)); + ASSERT_EQ(cast<AffineConstantExpr>(result).getValue(), 7); +} |