aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThurston Dang <thurston@google.com>2024-12-18 15:53:35 -0800
committerVitaly Buka <vitalybuka@google.com>2024-12-18 15:53:35 -0800
commit014a3c31ce7143093d8d1aafc6866e0c8a05a44b (patch)
tree09ed7378c44e3692dbe6b7ed283fdac33a16e14b
parent97ff599fc6949dc2bac3af7eb3200ebd35990c5a (diff)
parent2691b964150c77a9e6967423383ad14a7693095e (diff)
downloadllvm-users/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable.zip
llvm-users/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable.tar.gz
llvm-users/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable.tar.bz2
[𝘀𝗽𝗿] changes introduced through rebaseusers/vitalybuka/spr/main.nfcboundschecking-add-trapbb-local-variable
Created using spr 1.3.4 [skip ci]
-rw-r--r--clang/include/clang-c/Index.h6
-rw-r--r--clang/include/clang/AST/ASTNodeTraverser.h12
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h13
-rw-r--r--clang/include/clang/AST/StmtOpenACC.h123
-rw-r--r--clang/include/clang/AST/TextNodeDumper.h1
-rw-r--r--clang/include/clang/Basic/StmtNodes.td1
-rw-r--r--clang/include/clang/Parse/Parser.h15
-rw-r--r--clang/include/clang/Sema/SemaOpenACC.h18
-rw-r--r--clang/include/clang/Serialization/ASTBitCodes.h1
-rw-r--r--clang/lib/APINotes/APINotesReader.cpp7
-rw-r--r--clang/lib/AST/StmtOpenACC.cpp29
-rw-r--r--clang/lib/AST/StmtPrinter.cpp28
-rw-r--r--clang/lib/AST/StmtProfile.cpp8
-rw-r--r--clang/lib/AST/TextNodeDumper.cpp4
-rw-r--r--clang/lib/CodeGen/CGStmt.cpp3
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h5
-rw-r--r--clang/lib/CodeGen/CoverageMappingGen.cpp21
-rw-r--r--clang/lib/Format/ContinuationIndenter.cpp5
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp6
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp5
-rw-r--r--clang/lib/Parse/ParseOpenACC.cpp22
-rw-r--r--clang/lib/Sema/SemaExceptionSpec.cpp1
-rw-r--r--clang/lib/Sema/SemaOpenACC.cpp86
-rw-r--r--clang/lib/Sema/TreeTransform.h86
-rw-r--r--clang/lib/Serialization/ASTReaderStmt.cpp22
-rw-r--r--clang/lib/Serialization/ASTWriterStmt.cpp14
-rw-r--r--clang/lib/StaticAnalyzer/Core/ExprEngine.cpp1
-rw-r--r--clang/test/AST/ast-print-openacc-wait-construct.cpp22
-rw-r--r--clang/test/CoverageMapping/switch.cpp56
-rw-r--r--clang/test/CoverageMapping/switchmacro.c4
-rw-r--r--clang/test/ParserOpenACC/parse-wait-construct.c166
-rw-r--r--clang/test/SemaOpenACC/combined-construct-default-clause.c1
-rw-r--r--clang/test/SemaOpenACC/compute-construct-default-clause.c1
-rw-r--r--clang/test/SemaOpenACC/unimplemented-construct.c4
-rw-r--r--clang/test/SemaOpenACC/wait-construct-ast.cpp225
-rw-r--r--clang/test/SemaOpenACC/wait-construct.cpp95
-rw-r--r--clang/tools/libclang/CIndex.cpp9
-rw-r--r--clang/tools/libclang/CXCursor.cpp3
-rw-r--r--clang/unittests/Format/FormatTest.cpp9
-rw-r--r--clang/unittests/Format/TokenAnnotatorTest.cpp7
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h7
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp85
-rw-r--r--flang/module/cudadevice.f9014
-rw-r--r--flang/runtime/findloc.cpp21
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf28
-rw-r--r--libcxx/include/__algorithm/adjacent_find.h2
-rw-r--r--libcxx/include/__algorithm/binary_search.h1
-rw-r--r--libcxx/include/__algorithm/equal.h1
-rw-r--r--libcxx/include/__algorithm/equal_range.h4
-rw-r--r--libcxx/include/__algorithm/fill_n.h1
-rw-r--r--libcxx/include/__algorithm/find.h1
-rw-r--r--libcxx/include/__algorithm/find_end.h1
-rw-r--r--libcxx/include/__algorithm/find_first_of.h1
-rw-r--r--libcxx/include/__algorithm/for_each.h1
-rw-r--r--libcxx/include/__algorithm/includes.h1
-rw-r--r--libcxx/include/__algorithm/inplace_merge.h2
-rw-r--r--libcxx/include/__algorithm/is_heap.h1
-rw-r--r--libcxx/include/__algorithm/is_permutation.h1
-rw-r--r--libcxx/include/__algorithm/is_sorted.h1
-rw-r--r--libcxx/include/__algorithm/is_sorted_until.h1
-rw-r--r--libcxx/include/__algorithm/lower_bound.h1
-rw-r--r--libcxx/include/__algorithm/make_projected.h3
-rw-r--r--libcxx/include/__algorithm/merge.h1
-rw-r--r--libcxx/include/__algorithm/mismatch.h1
-rw-r--r--libcxx/include/__bit_reference6
-rw-r--r--libcxx/include/__hash_table4
-rw-r--r--libcxx/include/__split_buffer2
-rw-r--r--libcxx/include/__tree2
-rw-r--r--llvm/include/llvm/ProfileData/MemProf.h39
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMapping.cpp2
-rw-r--r--llvm/lib/ProfileData/MemProf.cpp2
-rw-r--r--llvm/lib/Support/Unix/Threading.inc2
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td11
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td21
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td10
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td17
-rw-r--r--llvm/lib/Target/DirectX/DXILDataScalarization.cpp101
-rw-r--r--llvm/lib/Target/DirectX/DXILFlattenArrays.cpp42
-rw-r--r--llvm/lib/Target/DirectX/DXILOpLowering.cpp8
-rw-r--r--llvm/lib/Target/DirectX/DXILShaderFlags.cpp54
-rw-r--r--llvm/lib/Target/DirectX/DXILShaderFlags.h7
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp1
-rw-r--r--llvm/lib/Target/DirectX/DirectXTargetMachine.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp7
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp48
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp17
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h19
-rw-r--r--llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir6
-rw-r--r--llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir6
-rw-r--r--llvm/test/CodeGen/DirectX/CreateHandle.ll2
-rw-r--r--llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll2
-rw-r--r--llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll44
-rw-r--r--llvm/test/CodeGen/DirectX/flatten-bug-117273.ll23
-rw-r--r--llvm/test/CodeGen/DirectX/llc-pipeline.ll23
-rw-r--r--llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll29
-rw-r--r--llvm/test/CodeGen/DirectX/scalar-bug-117273.ll25
-rw-r--r--llvm/test/CodeGen/DirectX/scalar-load.ll28
-rw-r--r--llvm/test/MC/AMDGPU/dl-insts.s192
-rw-r--r--llvm/test/MC/AMDGPU/gfx1030_err.s362
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3.s276
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s455
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s336
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s219
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3.s112
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s111
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s103
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s218
-rw-r--r--llvm/test/MC/AMDGPU/gfx908_err.s436
-rw-r--r--llvm/test/MC/AMDGPU/gfx90a_err.s435
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt370
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt476
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt356
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt143
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt269
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt169
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt276
-rw-r--r--llvm/test/ThinLTO/X86/memprof-missing-callsite.ll70
-rw-r--r--llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll32
-rw-r--r--llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll10
-rw-r--r--llvm/test/Transforms/PGOProfile/memprof-undrift.test174
-rw-r--r--llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll51
-rw-r--r--llvm/test/tools/llvm-cov/branch-macros.cpp2
-rw-r--r--llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s59
-rw-r--r--llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s48
-rw-r--r--llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s11
-rw-r--r--llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg3
-rw-r--r--llvm/tools/llvm-exegesis/lib/CMakeLists.txt3
-rw-r--r--llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp18
-rw-r--r--llvm/tools/llvm-exegesis/lib/MCInstrDescView.h11
-rw-r--r--llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt22
-rw-r--r--llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp272
-rw-r--r--llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp17
-rw-r--r--llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp12
-rw-r--r--llvm/tools/llvm-exegesis/llvm-exegesis.cpp33
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp9
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanTest.cpp24
-rw-r--r--llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp18
-rw-r--r--llvm/utils/gn/secondary/lld/Common/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn1
-rw-r--r--llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn2
-rw-r--r--mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h6
-rw-r--r--mlir/include/mlir/IR/Constraints.td6
-rw-r--r--mlir/include/mlir/IR/Properties.td133
-rw-r--r--mlir/include/mlir/TableGen/Property.h5
-rw-r--r--mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp8
-rw-r--r--mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp56
-rw-r--r--mlir/lib/Dialect/Vector/IR/VectorOps.cpp10
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp18
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp4
-rw-r--r--mlir/lib/IR/AffineExpr.cpp2
-rw-r--r--mlir/lib/TableGen/Predicate.cpp10
-rw-r--r--mlir/lib/TableGen/Property.cpp14
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir1
-rw-r--r--mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir100
-rw-r--r--mlir/test/IR/test-op-property-predicates.mlir148
-rw-r--r--mlir/test/lib/Dialect/Test/TestOps.td32
-rw-r--r--mlir/test/mlir-tblgen/op-properties-predicates.td73
-rw-r--r--mlir/test/mlir-tblgen/op-properties.td2
-rw-r--r--mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp56
-rw-r--r--mlir/unittests/IR/AffineExprTest.cpp18
162 files changed, 6993 insertions, 1870 deletions
diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 29858f0..122118b 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2186,7 +2186,11 @@ enum CXCursorKind {
*/
CXCursor_OpenACCHostDataConstruct = 326,
- CXCursor_LastStmt = CXCursor_OpenACCHostDataConstruct,
+ /** OpenACC wait Construct.
+ */
+ CXCursor_OpenACCWaitConstruct = 327,
+
+ CXCursor_LastStmt = CXCursor_OpenACCWaitConstruct,
/**
* Cursor that represents the translation unit itself.
diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h
index 3d63d58..f5652b2 100644
--- a/clang/include/clang/AST/ASTNodeTraverser.h
+++ b/clang/include/clang/AST/ASTNodeTraverser.h
@@ -159,7 +159,7 @@ public:
// Some statements have custom mechanisms for dumping their children.
if (isa<DeclStmt>(S) || isa<GenericSelectionExpr>(S) ||
- isa<RequiresExpr>(S))
+ isa<RequiresExpr>(S) || isa<OpenACCWaitConstruct>(S))
return;
if (Traversal == TK_IgnoreUnlessSpelledInSource &&
@@ -825,6 +825,16 @@ public:
Visit(C);
}
+ void VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *Node) {
+ // Needs custom child checking to put clauses AFTER the children, which are
+ // the expressions in the 'wait' construct. Others likely need this as well,
+ // and might need to do the associated statement after it.
+ for (const Stmt *S : Node->children())
+ Visit(S);
+ for (const auto *C : Node->clauses())
+ Visit(C);
+ }
+
void VisitInitListExpr(const InitListExpr *ILE) {
if (auto *Filler = ILE->getArrayFiller()) {
Visit(Filler, "array_filler");
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 5d5c91f..d9a87b3 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -4063,10 +4063,19 @@ DEF_TRAVERSE_STMT(OpenACCCombinedConstruct,
{ TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); })
DEF_TRAVERSE_STMT(OpenACCDataConstruct,
{ TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); })
-DEF_TRAVERSE_STMT(OpenACCEnterDataConstruct, {})
-DEF_TRAVERSE_STMT(OpenACCExitDataConstruct, {})
+DEF_TRAVERSE_STMT(OpenACCEnterDataConstruct,
+ { TRY_TO(VisitOpenACCClauseList(S->clauses())); })
+DEF_TRAVERSE_STMT(OpenACCExitDataConstruct,
+ { TRY_TO(VisitOpenACCClauseList(S->clauses())); })
DEF_TRAVERSE_STMT(OpenACCHostDataConstruct,
{ TRY_TO(TraverseOpenACCAssociatedStmtConstruct(S)); })
+DEF_TRAVERSE_STMT(OpenACCWaitConstruct, {
+ if (S->hasDevNumExpr())
+ TRY_TO(TraverseStmt(S->getDevNumExpr()));
+ for (auto *E : S->getQueueIdExprs())
+ TRY_TO(TraverseStmt(E));
+ TRY_TO(VisitOpenACCClauseList(S->clauses()));
+})
// Traverse HLSL: Out argument expression
DEF_TRAVERSE_STMT(HLSLOutArgExpr, {})
diff --git a/clang/include/clang/AST/StmtOpenACC.h b/clang/include/clang/AST/StmtOpenACC.h
index a1903cd..093393a 100644
--- a/clang/include/clang/AST/StmtOpenACC.h
+++ b/clang/include/clang/AST/StmtOpenACC.h
@@ -469,5 +469,128 @@ public:
return const_cast<OpenACCHostDataConstruct *>(this)->getStructuredBlock();
}
};
+
+// This class represents a 'wait' construct, which has some expressions plus a
+// clause list.
+class OpenACCWaitConstruct final
+ : public OpenACCConstructStmt,
+ private llvm::TrailingObjects<OpenACCWaitConstruct, Expr *,
+ OpenACCClause *> {
+ // FIXME: We should be storing a `const OpenACCClause *` to be consistent with
+ // the rest of the constructs, but TrailingObjects doesn't allow for mixing
+ // constness in its implementation of `getTrailingObjects`.
+
+ friend TrailingObjects;
+ friend class ASTStmtWriter;
+ friend class ASTStmtReader;
+ // Locations of the left and right parens of the 'wait-argument'
+ // expression-list.
+ SourceLocation LParenLoc, RParenLoc;
+ // Location of the 'queues' keyword, if present.
+ SourceLocation QueuesLoc;
+
+ // Number of the expressions being represented. Index '0' is always the
+ // 'devnum' expression, even if it not present.
+ unsigned NumExprs = 0;
+
+ OpenACCWaitConstruct(unsigned NumExprs, unsigned NumClauses)
+ : OpenACCConstructStmt(OpenACCWaitConstructClass,
+ OpenACCDirectiveKind::Wait, SourceLocation{},
+ SourceLocation{}, SourceLocation{}),
+ NumExprs(NumExprs) {
+ assert(NumExprs >= 1 &&
+ "NumExprs should always be >= 1 because the 'devnum' "
+ "expr is represented by a null if necessary");
+ std::uninitialized_value_construct(getExprPtr(),
+ getExprPtr() + NumExprs);
+ std::uninitialized_value_construct(getTrailingObjects<OpenACCClause *>(),
+ getTrailingObjects<OpenACCClause *>() +
+ NumClauses);
+ setClauseList(MutableArrayRef(const_cast<const OpenACCClause **>(
+ getTrailingObjects<OpenACCClause *>()),
+ NumClauses));
+ }
+
+ OpenACCWaitConstruct(SourceLocation Start, SourceLocation DirectiveLoc,
+ SourceLocation LParenLoc, Expr *DevNumExpr,
+ SourceLocation QueuesLoc, ArrayRef<Expr *> QueueIdExprs,
+ SourceLocation RParenLoc, SourceLocation End,
+ ArrayRef<const OpenACCClause *> Clauses)
+ : OpenACCConstructStmt(OpenACCWaitConstructClass,
+ OpenACCDirectiveKind::Wait, Start, DirectiveLoc,
+ End),
+ LParenLoc(LParenLoc), RParenLoc(RParenLoc), QueuesLoc(QueuesLoc),
+ NumExprs(QueueIdExprs.size() + 1) {
+ assert(NumExprs >= 1 &&
+ "NumExprs should always be >= 1 because the 'devnum' "
+ "expr is represented by a null if necessary");
+
+ std::uninitialized_copy(&DevNumExpr, &DevNumExpr + 1,
+ getExprPtr());
+ std::uninitialized_copy(QueueIdExprs.begin(), QueueIdExprs.end(),
+ getExprPtr() + 1);
+
+ std::uninitialized_copy(const_cast<OpenACCClause **>(Clauses.begin()),
+ const_cast<OpenACCClause **>(Clauses.end()),
+ getTrailingObjects<OpenACCClause *>());
+ setClauseList(MutableArrayRef(const_cast<const OpenACCClause **>(
+ getTrailingObjects<OpenACCClause *>()),
+ Clauses.size()));
+ }
+
+ size_t numTrailingObjects(OverloadToken<Expr *>) const { return NumExprs; }
+ size_t numTrailingObjects(OverloadToken<const OpenACCClause *>) const {
+ return clauses().size();
+ }
+
+ Expr **getExprPtr() const {
+ return const_cast<Expr**>(getTrailingObjects<Expr *>());
+ }
+
+ llvm::ArrayRef<Expr *> getExprs() const {
+ return llvm::ArrayRef<Expr *>(getExprPtr(), NumExprs);
+ }
+
+ llvm::ArrayRef<Expr *> getExprs() {
+ return llvm::ArrayRef<Expr *>(getExprPtr(), NumExprs);
+ }
+
+public:
+ static bool classof(const Stmt *T) {
+ return T->getStmtClass() == OpenACCWaitConstructClass;
+ }
+
+ static OpenACCWaitConstruct *
+ CreateEmpty(const ASTContext &C, unsigned NumExprs, unsigned NumClauses);
+
+ static OpenACCWaitConstruct *
+ Create(const ASTContext &C, SourceLocation Start, SourceLocation DirectiveLoc,
+ SourceLocation LParenLoc, Expr *DevNumExpr, SourceLocation QueuesLoc,
+ ArrayRef<Expr *> QueueIdExprs, SourceLocation RParenLoc,
+ SourceLocation End, ArrayRef<const OpenACCClause *> Clauses);
+
+ SourceLocation getLParenLoc() const { return LParenLoc; }
+ SourceLocation getRParenLoc() const { return RParenLoc; }
+ bool hasQueuesTag() const { return !QueuesLoc.isInvalid(); }
+ SourceLocation getQueuesLoc() const { return QueuesLoc; }
+
+ bool hasDevNumExpr() const { return getExprs()[0]; }
+ Expr *getDevNumExpr() const { return getExprs()[0]; }
+ llvm::ArrayRef<Expr *> getQueueIdExprs() { return getExprs().drop_front(); }
+ llvm::ArrayRef<Expr *> getQueueIdExprs() const {
+ return getExprs().drop_front();
+ }
+
+ child_range children() {
+ Stmt **Begin = reinterpret_cast<Stmt **>(getExprPtr());
+ return child_range(Begin, Begin + NumExprs);
+ }
+
+ const_child_range children() const {
+ Stmt *const *Begin =
+ reinterpret_cast<Stmt *const *>(getExprPtr());
+ return const_child_range(Begin, Begin + NumExprs);
+ }
+};
} // namespace clang
#endif // LLVM_CLANG_AST_STMTOPENACC_H
diff --git a/clang/include/clang/AST/TextNodeDumper.h b/clang/include/clang/AST/TextNodeDumper.h
index e54e7e5..b6f16be 100644
--- a/clang/include/clang/AST/TextNodeDumper.h
+++ b/clang/include/clang/AST/TextNodeDumper.h
@@ -415,6 +415,7 @@ public:
void VisitOpenACCEnterDataConstruct(const OpenACCEnterDataConstruct *S);
void VisitOpenACCExitDataConstruct(const OpenACCExitDataConstruct *S);
void VisitOpenACCHostDataConstruct(const OpenACCHostDataConstruct *S);
+ void VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S);
void VisitOpenACCAsteriskSizeExpr(const OpenACCAsteriskSizeExpr *S);
void VisitEmbedExpr(const EmbedExpr *S);
void VisitAtomicExpr(const AtomicExpr *AE);
diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 0c3c580..6c7314b 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -312,6 +312,7 @@ def OpenACCDataConstruct : StmtNode<OpenACCAssociatedStmtConstruct>;
def OpenACCEnterDataConstruct : StmtNode<OpenACCConstructStmt>;
def OpenACCExitDataConstruct : StmtNode<OpenACCConstructStmt>;
def OpenACCHostDataConstruct : StmtNode<OpenACCAssociatedStmtConstruct>;
+def OpenACCWaitConstruct : StmtNode<OpenACCConstructStmt>;
// OpenACC Additional Expressions.
def OpenACCAsteriskSizeExpr : StmtNode<Expr>;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index d3838a4..e99d2cf 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -3706,10 +3706,14 @@ private:
OpenACCDirectiveKind DirKind;
SourceLocation StartLoc;
SourceLocation DirLoc;
+ SourceLocation LParenLoc;
+ SourceLocation RParenLoc;
SourceLocation EndLoc;
+ SourceLocation MiscLoc;
+ SmallVector<Expr *> Exprs;
SmallVector<OpenACCClause *> Clauses;
- // TODO OpenACC: As we implement support for the Atomic, Routine, Cache, and
- // Wait constructs, we likely want to put that information in here as well.
+ // TODO OpenACC: As we implement support for the Atomic, Routine, and Cache
+ // constructs, we likely want to put that information in here as well.
};
struct OpenACCWaitParseInfo {
@@ -3717,6 +3721,13 @@ private:
Expr *DevNumExpr = nullptr;
SourceLocation QueuesLoc;
SmallVector<Expr *> QueueIdExprs;
+
+ SmallVector<Expr *> getAllExprs() {
+ SmallVector<Expr *> Out;
+ Out.push_back(DevNumExpr);
+ Out.insert(Out.end(), QueueIdExprs.begin(), QueueIdExprs.end());
+ return Out;
+ }
};
/// Represents the 'error' state of parsing an OpenACC Clause, and stores
diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h
index 4b92af5..04ab1ac 100644
--- a/clang/include/clang/Sema/SemaOpenACC.h
+++ b/clang/include/clang/Sema/SemaOpenACC.h
@@ -679,12 +679,18 @@ public:
/// Called after the directive has been completely parsed, including the
/// declaration group or associated statement.
- StmtResult ActOnEndStmtDirective(OpenACCDirectiveKind K,
- SourceLocation StartLoc,
- SourceLocation DirLoc,
- SourceLocation EndLoc,
- ArrayRef<OpenACCClause *> Clauses,
- StmtResult AssocStmt);
+ /// LParenLoc: Location of the left paren, if it exists (not on all
+ /// constructs).
+ /// MiscLoc: First misc location, if necessary (not all constructs).
+ /// Exprs: List of expressions on the construct itself, if necessary (not all
+ /// constructs).
+ /// RParenLoc: Location of the right paren, if it exists (not on all
+ /// constructs).
+ StmtResult ActOnEndStmtDirective(
+ OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc,
+ SourceLocation LParenLoc, SourceLocation MiscLoc, ArrayRef<Expr *> Exprs,
+ SourceLocation RParenLoc, SourceLocation EndLoc,
+ ArrayRef<OpenACCClause *> Clauses, StmtResult AssocStmt);
/// Called after the directive has been completely parsed, including the
/// declaration group or associated statement.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index da0e5fd..57e27c3 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -2021,6 +2021,7 @@ enum StmtCode {
STMT_OPENACC_ENTER_DATA_CONSTRUCT,
STMT_OPENACC_EXIT_DATA_CONSTRUCT,
STMT_OPENACC_HOST_DATA_CONSTRUCT,
+ STMT_OPENACC_WAIT_CONSTRUCT,
// HLSL Constructs
EXPR_HLSL_OUT_ARG,
diff --git a/clang/lib/APINotes/APINotesReader.cpp b/clang/lib/APINotes/APINotesReader.cpp
index fa06dff..646eabd 100644
--- a/clang/lib/APINotes/APINotesReader.cpp
+++ b/clang/lib/APINotes/APINotesReader.cpp
@@ -2045,7 +2045,12 @@ APINotesReader::VersionedInfo<T>::VersionedInfo(
Results.begin(), Results.end(),
[](const std::pair<llvm::VersionTuple, T> &left,
const std::pair<llvm::VersionTuple, T> &right) -> bool {
- assert(left.first != right.first && "two entries for the same version");
+ // The comparison function should be reflective, and with expensive
+ // checks we can get callbacks basically checking that lambda(a,a) is
+ // false. We could still check that we do not find equal elements when
+ // left!=right.
+ assert((&left == &right || left.first != right.first) &&
+ "two entries for the same version");
return left.first < right.first;
}));
diff --git a/clang/lib/AST/StmtOpenACC.cpp b/clang/lib/AST/StmtOpenACC.cpp
index fb73dfb..6d9f267 100644
--- a/clang/lib/AST/StmtOpenACC.cpp
+++ b/clang/lib/AST/StmtOpenACC.cpp
@@ -196,3 +196,32 @@ OpenACCHostDataConstruct *OpenACCHostDataConstruct::Create(
Clauses, StructuredBlock);
return Inst;
}
+
+OpenACCWaitConstruct *OpenACCWaitConstruct::CreateEmpty(const ASTContext &C,
+ unsigned NumExprs,
+ unsigned NumClauses) {
+ void *Mem = C.Allocate(
+ OpenACCWaitConstruct::totalSizeToAlloc<Expr *, OpenACCClause *>(
+ NumExprs, NumClauses));
+
+ auto *Inst = new (Mem) OpenACCWaitConstruct(NumExprs, NumClauses);
+ return Inst;
+}
+
+OpenACCWaitConstruct *OpenACCWaitConstruct::Create(
+ const ASTContext &C, SourceLocation Start, SourceLocation DirectiveLoc,
+ SourceLocation LParenLoc, Expr *DevNumExpr, SourceLocation QueuesLoc,
+ ArrayRef<Expr *> QueueIdExprs, SourceLocation RParenLoc, SourceLocation End,
+ ArrayRef<const OpenACCClause *> Clauses) {
+
+ assert(llvm::all_of(QueueIdExprs, [](Expr *E) { return E != nullptr; }));
+
+ void *Mem = C.Allocate(
+ OpenACCWaitConstruct::totalSizeToAlloc<Expr *, OpenACCClause *>(
+ QueueIdExprs.size() + 1, Clauses.size()));
+
+ auto *Inst = new (Mem)
+ OpenACCWaitConstruct(Start, DirectiveLoc, LParenLoc, DevNumExpr,
+ QueuesLoc, QueueIdExprs, RParenLoc, End, Clauses);
+ return Inst;
+}
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 488419a..ecc9b6e3 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1238,6 +1238,34 @@ void StmtPrinter::VisitOpenACCHostDataConstruct(OpenACCHostDataConstruct *S) {
PrintStmt(S->getStructuredBlock());
}
+void StmtPrinter::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) {
+ Indent() << "#pragma acc wait";
+ if (!S->getLParenLoc().isInvalid()) {
+ OS << "(";
+ if (S->hasDevNumExpr()) {
+ OS << "devnum: ";
+ S->getDevNumExpr()->printPretty(OS, nullptr, Policy);
+ OS << " : ";
+ }
+
+ if (S->hasQueuesTag())
+ OS << "queues: ";
+
+ llvm::interleaveComma(S->getQueueIdExprs(), OS, [&](const Expr *E) {
+ E->printPretty(OS, nullptr, Policy);
+ });
+
+ OS << ")";
+ }
+
+ if (!S->clauses().empty()) {
+ OS << ' ';
+ OpenACCClausePrinter Printer(OS, Policy);
+ Printer.VisitClauseList(S->clauses());
+ }
+ OS << '\n';
+}
+
//===----------------------------------------------------------------------===//
// Expr printing methods.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index 1fb2387..fccd97d 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -2743,6 +2743,14 @@ void StmtProfiler::VisitOpenACCHostDataConstruct(
P.VisitOpenACCClauseList(S->clauses());
}
+void StmtProfiler::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S) {
+ // VisitStmt covers 'children', so the exprs inside of it are covered.
+ VisitStmt(S);
+
+ OpenACCClauseProfiler P{*this};
+ P.VisitOpenACCClauseList(S->clauses());
+}
+
void StmtProfiler::VisitHLSLOutArgExpr(const HLSLOutArgExpr *S) {
VisitStmt(S);
}
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
index b5af10d..7cdffbe 100644
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -2960,6 +2960,10 @@ void TextNodeDumper::VisitOpenACCHostDataConstruct(
OS << " " << S->getDirectiveKind();
}
+void TextNodeDumper::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *S) {
+ OS << " " << S->getDirectiveKind();
+}
+
void TextNodeDumper::VisitEmbedExpr(const EmbedExpr *S) {
AddChild("begin", [=] { OS << S->getStartingElementPos(); });
AddChild("number of elements", [=] { OS << S->getDataElementCount(); });
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6c7a594..6c604f4 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -470,6 +470,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
case Stmt::OpenACCHostDataConstructClass:
EmitOpenACCHostDataConstruct(cast<OpenACCHostDataConstruct>(*S));
break;
+ case Stmt::OpenACCWaitConstructClass:
+ EmitOpenACCWaitConstruct(cast<OpenACCWaitConstruct>(*S));
+ break;
}
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 092d553..847999c 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4118,6 +4118,11 @@ public:
EmitStmt(S.getStructuredBlock());
}
+ void EmitOpenACCWaitConstruct(const OpenACCWaitConstruct &S) {
+ // TODO OpenACC: Implement this. It is currently implemented as a 'no-op',
+ // but in the future we will implement some sort of IR.
+ }
+
//===--------------------------------------------------------------------===//
// LValue Expression Emission
//===--------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp
index 7248abe..cda218e 100644
--- a/clang/lib/CodeGen/CoverageMappingGen.cpp
+++ b/clang/lib/CodeGen/CoverageMappingGen.cpp
@@ -1146,12 +1146,15 @@ struct CounterCoverageMappingBuilder
/// Create a Branch Region around a SwitchCase for code coverage
/// and add it to the function's SourceRegions.
- void createSwitchCaseRegion(const SwitchCase *SC, Counter TrueCnt) {
+ /// Returns Counter that corresponds to SC.
+ Counter createSwitchCaseRegion(const SwitchCase *SC, Counter ParentCount) {
// Push region onto RegionStack but immediately pop it (which adds it to
// the function's SourceRegions) because it doesn't apply to any other
// source other than the SwitchCase.
+ Counter TrueCnt = getRegionCounter(SC);
popRegions(pushRegion(TrueCnt, getStart(SC), SC->getColonLoc(),
- Counter::getZero()));
+ subtractCounters(ParentCount, TrueCnt)));
+ return TrueCnt;
}
/// Check whether a region with bounds \c StartLoc and \c EndLoc
@@ -1863,16 +1866,22 @@ struct CounterCoverageMappingBuilder
const SwitchCase *Case = S->getSwitchCaseList();
for (; Case; Case = Case->getNextSwitchCase()) {
HasDefaultCase = HasDefaultCase || isa<DefaultStmt>(Case);
- auto CaseCount = getRegionCounter(Case);
+ auto CaseCount = createSwitchCaseRegion(Case, ParentCount);
CaseCountSum = addCounters(CaseCountSum, CaseCount, /*Simplify=*/false);
- createSwitchCaseRegion(Case, CaseCount);
}
// If no explicit default case exists, create a branch region to represent
// the hidden branch, which will be added later by the CodeGen. This region
// will be associated with the switch statement's condition.
if (!HasDefaultCase) {
- Counter DefaultCount = subtractCounters(ParentCount, CaseCountSum);
- createBranchRegion(S->getCond(), Counter::getZero(), DefaultCount);
+ // Simplify is skipped while building the counters above: it can get
+ // really slow on top of switches with thousands of cases. Instead,
+ // trigger simplification by adding zero to the last counter.
+ CaseCountSum =
+ addCounters(CaseCountSum, Counter::getZero(), /*Simplify=*/true);
+
+ // This is considered as the False count on SwitchStmt.
+ Counter SwitchFalse = subtractCounters(ParentCount, CaseCountSum);
+ createBranchRegion(S->getCond(), CaseCountSum, SwitchFalse);
}
}
diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 9ffdc04..554b55f 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -461,9 +461,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
getColumnLimit(State) ||
CurrentState.BreakBeforeParameter) &&
(!Current.isTrailingComment() || Current.NewlinesBefore > 0) &&
- (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All ||
- Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon ||
- Style.ColumnLimit != 0)) {
+ (Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon ||
+ Style.ColumnLimit > 0 || Current.NewlinesBefore > 0)) {
return true;
}
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 05c86db..6a8caa2 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -492,7 +492,8 @@ private:
ProbablyFunctionType && CurrentToken->Next &&
(CurrentToken->Next->is(tok::l_paren) ||
(CurrentToken->Next->is(tok::l_square) &&
- Line.MustBeDeclaration))) {
+ (Line.MustBeDeclaration ||
+ PrevNonComment->isTypeName(LangOpts))))) {
OpeningParen.setType(OpeningParen.Next->is(tok::caret)
? TT_ObjCBlockLParen
: TT_FunctionTypeLParen);
@@ -2403,7 +2404,8 @@ private:
// not auto operator->() -> xxx;
Current.setType(TT_TrailingReturnArrow);
} else if (Current.is(tok::arrow) && Current.Previous &&
- Current.Previous->is(tok::r_brace)) {
+ Current.Previous->is(tok::r_brace) &&
+ Current.Previous->is(BK_Block)) {
// Concept implicit conversion constraint needs to be treated like
// a trailing return type ... } -> <type>.
Current.setType(TT_TrailingReturnArrow);
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index de7e261..654148a 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -570,8 +570,9 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
Keywords.kw_as));
ProbablyBracedList =
- ProbablyBracedList || (IsCpp && (PrevTok->Tok.isLiteral() ||
- NextTok->is(tok::l_paren)));
+ ProbablyBracedList ||
+ (IsCpp && (PrevTok->Tok.isLiteral() ||
+ NextTok->isOneOf(tok::l_paren, tok::arrow)));
// If there is a comma, semicolon or right paren after the closing
// brace, we assume this is a braced initializer list.
diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp
index af175a4..5da34a2 100644
--- a/clang/lib/Parse/ParseOpenACC.cpp
+++ b/clang/lib/Parse/ParseOpenACC.cpp
@@ -573,6 +573,7 @@ bool doesDirectiveHaveAssociatedStmt(OpenACCDirectiveKind DirKind) {
default:
case OpenACCDirectiveKind::EnterData:
case OpenACCDirectiveKind::ExitData:
+ case OpenACCDirectiveKind::Wait:
return false;
case OpenACCDirectiveKind::Parallel:
case OpenACCDirectiveKind::Serial:
@@ -604,6 +605,7 @@ unsigned getOpenACCScopeFlags(OpenACCDirectiveKind DirKind) {
case OpenACCDirectiveKind::EnterData:
case OpenACCDirectiveKind::ExitData:
case OpenACCDirectiveKind::HostData:
+ case OpenACCDirectiveKind::Wait:
return 0;
case OpenACCDirectiveKind::Invalid:
llvm_unreachable("Shouldn't be creating a scope for an invalid construct");
@@ -1288,7 +1290,8 @@ Parser::ParseOpenACCWaitArgument(SourceLocation Loc, bool IsDirective) {
return Result;
}
- Result.QueueIdExprs.push_back(Res.first.get());
+ if (Res.first.isUsable())
+ Result.QueueIdExprs.push_back(Res.first.get());
}
return Result;
@@ -1422,6 +1425,7 @@ Parser::ParseOpenACCDirective() {
SourceLocation StartLoc = ConsumeAnnotationToken();
SourceLocation DirLoc = getCurToken().getLocation();
OpenACCDirectiveKind DirKind = ParseOpenACCDirectiveKind(*this);
+ Parser::OpenACCWaitParseInfo WaitInfo;
getActions().OpenACC().ActOnConstruct(DirKind, DirLoc);
@@ -1462,7 +1466,8 @@ Parser::ParseOpenACCDirective() {
break;
case OpenACCDirectiveKind::Wait:
// OpenACC has an optional paren-wrapped 'wait-argument'.
- if (ParseOpenACCWaitArgument(DirLoc, /*IsDirective=*/true).Failed)
+ WaitInfo = ParseOpenACCWaitArgument(DirLoc, /*IsDirective=*/true);
+ if (WaitInfo.Failed)
T.skipToEnd();
else
T.consumeClose();
@@ -1476,8 +1481,14 @@ Parser::ParseOpenACCDirective() {
}
// Parses the list of clauses, if present, plus set up return value.
- OpenACCDirectiveParseInfo ParseInfo{DirKind, StartLoc, DirLoc,
- SourceLocation{},
+ OpenACCDirectiveParseInfo ParseInfo{DirKind,
+ StartLoc,
+ DirLoc,
+ T.getOpenLocation(),
+ T.getCloseLocation(),
+ /*EndLoc=*/SourceLocation{},
+ WaitInfo.QueuesLoc,
+ WaitInfo.getAllExprs(),
ParseOpenACCClauseList(DirKind)};
assert(Tok.is(tok::annot_pragma_openacc_end) &&
@@ -1529,6 +1540,7 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() {
}
return getActions().OpenACC().ActOnEndStmtDirective(
- DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.EndLoc,
+ DirInfo.DirKind, DirInfo.StartLoc, DirInfo.DirLoc, DirInfo.LParenLoc,
+ DirInfo.MiscLoc, DirInfo.Exprs, DirInfo.RParenLoc, DirInfo.EndLoc,
DirInfo.Clauses, AssocStmt);
}
diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index 2be6af2..505cc5e 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1398,6 +1398,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
case Expr::HLSLOutArgExprClass:
case Stmt::OpenACCEnterDataConstructClass:
case Stmt::OpenACCExitDataConstructClass:
+ case Stmt::OpenACCWaitConstructClass:
// These expressions can never throw.
return CT_Cannot;
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 11c1835..aa9097b 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -41,6 +41,7 @@ bool diagnoseConstructAppertainment(SemaOpenACC &S, OpenACCDirectiveKind K,
case OpenACCDirectiveKind::EnterData:
case OpenACCDirectiveKind::ExitData:
case OpenACCDirectiveKind::HostData:
+ case OpenACCDirectiveKind::Wait:
if (!IsStmt)
return S.Diag(StartLoc, diag::err_acc_construct_appertainment) << K;
break;
@@ -566,6 +567,16 @@ bool checkValidAfterDeviceType(
return true;
}
+// A temporary function that helps implement the 'not implemented' check at the
+// top of each clause checking function. This should only be used in conjunction
+// with the one being currently implemented/only updated after the entire
+// construct has been implemented.
+bool isDirectiveKindImplemented(OpenACCDirectiveKind DK) {
+ return isOpenACCComputeDirectiveKind(DK) ||
+ isOpenACCCombinedDirectiveKind(DK) || isOpenACCDataDirectiveKind(DK) ||
+ DK == OpenACCDirectiveKind::Loop || DK == OpenACCDirectiveKind::Wait;
+}
+
class SemaOpenACCClauseVisitor {
SemaOpenACC &SemaRef;
ASTContext &Ctx;
@@ -680,9 +691,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitIfClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// constructs that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// There is no prose in the standard that says duplicates aren't allowed,
@@ -717,8 +726,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitSelfClause(
// Restrictions only properly implemented on 'compute' constructs, and
// 'compute' constructs are the only construct that can do anything with
// this yet, so skip/treat as unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// TODO OpenACC: When we implement this for 'update', this takes a
@@ -915,9 +923,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitAsyncClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// There is no prose in the standard that says duplicates aren't allowed,
@@ -973,9 +979,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitPresentClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// ActOnVar ensured that everything is a valid variable reference, so there
// really isn't anything to do here. GCC does some duplicate-finding, though
@@ -992,9 +996,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// ActOnVar ensured that everything is a valid variable reference, so there
// really isn't anything to do here. GCC does some duplicate-finding, though
@@ -1011,9 +1013,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyInClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// ActOnVar ensured that everything is a valid variable reference, so there
// really isn't anything to do here. GCC does some duplicate-finding, though
@@ -1030,9 +1030,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitCopyOutClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// ActOnVar ensured that everything is a valid variable reference, so there
// really isn't anything to do here. GCC does some duplicate-finding, though
@@ -1109,9 +1107,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitDevicePtrClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// ActOnVar ensured that everything is a valid variable reference, but we
@@ -1134,9 +1130,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitWaitClause(
// constructs, and 'compute'/'combined'/'data' constructs are the only
// construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()) &&
- !isOpenACCDataDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
return OpenACCWaitClause::Create(
@@ -1150,10 +1144,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitDeviceTypeClause(
// 'loop' constructs, and 'compute'/'combined'/'data'/'loop' constructs are
// the only construct that can do anything with this yet, so skip/treat as
// unimplemented in this case.
- if (!isOpenACCComputeDirectiveKind(Clause.getDirectiveKind()) &&
- Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop &&
- Clause.getDirectiveKind() != OpenACCDirectiveKind::Data &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// TODO OpenACC: Once we get enough of the CodeGen implemented that we have
@@ -1347,8 +1338,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitVectorClause(
// Restrictions only properly implemented on 'loop'/'combined' constructs, and
// it is the only construct that can do anything with this, so skip/treat as
// unimplemented for the routine constructs.
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
Expr *IntExpr =
@@ -1446,8 +1436,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitWorkerClause(
// Restrictions only properly implemented on 'loop'/'combined' constructs, and
// it is the only construct that can do anything with this, so skip/treat as
// unimplemented for the routine constructs.
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
Expr *IntExpr =
@@ -1559,8 +1548,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitGangClause(
// Restrictions only properly implemented on 'loop' constructs, and it is
// the only construct that can do anything with this, so skip/treat as
// unimplemented for the combined constructs.
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::Loop &&
- !isOpenACCCombinedDirectiveKind(Clause.getDirectiveKind()))
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// OpenACC 3.3 Section 2.9.11: A reduction clause may not appear on a loop
@@ -1691,7 +1679,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitFinalizeClause(
OpenACCClause *SemaOpenACCClauseVisitor::VisitIfPresentClause(
SemaOpenACC::OpenACCParsedClause &Clause) {
- if (Clause.getDirectiveKind() != OpenACCDirectiveKind::HostData)
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// There isn't anything to do here, this is only valid on one construct, and
// has no associated rules.
@@ -1704,7 +1692,7 @@ OpenACCClause *SemaOpenACCClauseVisitor::VisitSeqClause(
// Restrictions only properly implemented on 'loop' constructs and combined ,
// and it is the only construct that can do anything with this, so skip/treat
// as unimplemented for the routine constructs.
- if (Clause.getDirectiveKind() == OpenACCDirectiveKind::Routine)
+ if (!isDirectiveKindImplemented(Clause.getDirectiveKind()))
return isNotImplemented();
// OpenACC 3.3 2.9:
@@ -1879,6 +1867,7 @@ bool PreserveLoopRAIIDepthInAssociatedStmtRAII(OpenACCDirectiveKind DK) {
return true;
case OpenACCDirectiveKind::EnterData:
case OpenACCDirectiveKind::ExitData:
+ case OpenACCDirectiveKind::Wait:
llvm_unreachable("Doesn't have an associated stmt");
default:
case OpenACCDirectiveKind::Invalid:
@@ -2308,6 +2297,10 @@ void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K,
// Nothing to do here, there is no real legalization that needs to happen
// here as these constructs do not take any arguments.
break;
+ case OpenACCDirectiveKind::Wait:
+ // Nothing really to do here, the arguments to the 'wait' should have
+ // already been handled by the time we get here.
+ break;
default:
Diag(DirLoc, diag::warn_acc_construct_unimplemented) << K;
break;
@@ -3637,12 +3630,11 @@ bool SemaOpenACC::ActOnStartStmtDirective(
return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true);
}
-StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K,
- SourceLocation StartLoc,
- SourceLocation DirLoc,
- SourceLocation EndLoc,
- ArrayRef<OpenACCClause *> Clauses,
- StmtResult AssocStmt) {
+StmtResult SemaOpenACC::ActOnEndStmtDirective(
+ OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation DirLoc,
+ SourceLocation LParenLoc, SourceLocation MiscLoc, ArrayRef<Expr *> Exprs,
+ SourceLocation RParenLoc, SourceLocation EndLoc,
+ ArrayRef<OpenACCClause *> Clauses, StmtResult AssocStmt) {
switch (K) {
default:
return StmtEmpty();
@@ -3685,6 +3677,11 @@ StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K,
getASTContext(), StartLoc, DirLoc, EndLoc, Clauses,
AssocStmt.isUsable() ? AssocStmt.get() : nullptr);
}
+ case OpenACCDirectiveKind::Wait: {
+ return OpenACCWaitConstruct::Create(
+ getASTContext(), StartLoc, DirLoc, LParenLoc, Exprs.front(), MiscLoc,
+ Exprs.drop_front(), RParenLoc, EndLoc, Clauses);
+ }
}
llvm_unreachable("Unhandled case in directive handling?");
}
@@ -3697,6 +3694,7 @@ StmtResult SemaOpenACC::ActOnAssociatedStmt(
llvm_unreachable("Unimplemented associated statement application");
case OpenACCDirectiveKind::EnterData:
case OpenACCDirectiveKind::ExitData:
+ case OpenACCDirectiveKind::Wait:
llvm_unreachable(
"these don't have associated statements, so shouldn't get here");
case OpenACCDirectiveKind::Parallel:
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 04167e7..c097465 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -4087,8 +4087,9 @@ public:
SourceLocation EndLoc,
ArrayRef<OpenACCClause *> Clauses,
StmtResult StrBlock) {
- return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, DirLoc,
- EndLoc, Clauses, StrBlock);
+ return getSema().OpenACC().ActOnEndStmtDirective(
+ K, BeginLoc, DirLoc, SourceLocation{}, SourceLocation{}, {},
+ SourceLocation{}, EndLoc, Clauses, StrBlock);
}
StmtResult RebuildOpenACCLoopConstruct(SourceLocation BeginLoc,
@@ -4097,7 +4098,8 @@ public:
ArrayRef<OpenACCClause *> Clauses,
StmtResult Loop) {
return getSema().OpenACC().ActOnEndStmtDirective(
- OpenACCDirectiveKind::Loop, BeginLoc, DirLoc, EndLoc, Clauses, Loop);
+ OpenACCDirectiveKind::Loop, BeginLoc, DirLoc, SourceLocation{},
+ SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, Loop);
}
StmtResult RebuildOpenACCCombinedConstruct(OpenACCDirectiveKind K,
@@ -4106,8 +4108,9 @@ public:
SourceLocation EndLoc,
ArrayRef<OpenACCClause *> Clauses,
StmtResult Loop) {
- return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, DirLoc,
- EndLoc, Clauses, Loop);
+ return getSema().OpenACC().ActOnEndStmtDirective(
+ K, BeginLoc, DirLoc, SourceLocation{}, SourceLocation{}, {},
+ SourceLocation{}, EndLoc, Clauses, Loop);
}
StmtResult RebuildOpenACCDataConstruct(SourceLocation BeginLoc,
@@ -4115,9 +4118,9 @@ public:
SourceLocation EndLoc,
ArrayRef<OpenACCClause *> Clauses,
StmtResult StrBlock) {
- return getSema().OpenACC().ActOnEndStmtDirective(OpenACCDirectiveKind::Data,
- BeginLoc, DirLoc, EndLoc,
- Clauses, StrBlock);
+ return getSema().OpenACC().ActOnEndStmtDirective(
+ OpenACCDirectiveKind::Data, BeginLoc, DirLoc, SourceLocation{},
+ SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, StrBlock);
}
StmtResult
@@ -4125,7 +4128,8 @@ public:
SourceLocation DirLoc, SourceLocation EndLoc,
ArrayRef<OpenACCClause *> Clauses) {
return getSema().OpenACC().ActOnEndStmtDirective(
- OpenACCDirectiveKind::EnterData, BeginLoc, DirLoc, EndLoc, Clauses, {});
+ OpenACCDirectiveKind::EnterData, BeginLoc, DirLoc, SourceLocation{},
+ SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {});
}
StmtResult
@@ -4133,7 +4137,8 @@ public:
SourceLocation DirLoc, SourceLocation EndLoc,
ArrayRef<OpenACCClause *> Clauses) {
return getSema().OpenACC().ActOnEndStmtDirective(
- OpenACCDirectiveKind::ExitData, BeginLoc, DirLoc, EndLoc, Clauses, {});
+ OpenACCDirectiveKind::ExitData, BeginLoc, DirLoc, SourceLocation{},
+ SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, {});
}
StmtResult RebuildOpenACCHostDataConstruct(SourceLocation BeginLoc,
@@ -4142,8 +4147,21 @@ public:
ArrayRef<OpenACCClause *> Clauses,
StmtResult StrBlock) {
return getSema().OpenACC().ActOnEndStmtDirective(
- OpenACCDirectiveKind::HostData, BeginLoc, DirLoc, EndLoc, Clauses,
- StrBlock);
+ OpenACCDirectiveKind::HostData, BeginLoc, DirLoc, SourceLocation{},
+ SourceLocation{}, {}, SourceLocation{}, EndLoc, Clauses, StrBlock);
+ }
+
+ StmtResult RebuildOpenACCWaitConstruct(
+ SourceLocation BeginLoc, SourceLocation DirLoc, SourceLocation LParenLoc,
+ Expr *DevNumExpr, SourceLocation QueuesLoc, ArrayRef<Expr *> QueueIdExprs,
+ SourceLocation RParenLoc, SourceLocation EndLoc,
+ ArrayRef<OpenACCClause *> Clauses) {
+ llvm::SmallVector<Expr *> Exprs;
+ Exprs.push_back(DevNumExpr);
+ Exprs.insert(Exprs.end(), QueueIdExprs.begin(), QueueIdExprs.end());
+ return getSema().OpenACC().ActOnEndStmtDirective(
+ OpenACCDirectiveKind::Wait, BeginLoc, DirLoc, LParenLoc, QueuesLoc,
+ Exprs, RParenLoc, EndLoc, Clauses, {});
}
ExprResult RebuildOpenACCAsteriskSizeExpr(SourceLocation AsteriskLoc) {
@@ -12331,6 +12349,50 @@ StmtResult TreeTransform<Derived>::TransformOpenACCHostDataConstruct(
}
template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOpenACCWaitConstruct(OpenACCWaitConstruct *C) {
+ getSema().OpenACC().ActOnConstruct(C->getDirectiveKind(), C->getBeginLoc());
+
+ ExprResult DevNumExpr;
+ if (C->hasDevNumExpr()) {
+ DevNumExpr = getDerived().TransformExpr(C->getDevNumExpr());
+
+ if (DevNumExpr.isUsable())
+ DevNumExpr = getSema().OpenACC().ActOnIntExpr(
+ OpenACCDirectiveKind::Wait, OpenACCClauseKind::Invalid,
+ C->getBeginLoc(), DevNumExpr.get());
+ }
+
+ llvm::SmallVector<Expr *> QueueIdExprs;
+
+ for (Expr *QE : C->getQueueIdExprs()) {
+ assert(QE && "Null queue id expr?");
+ ExprResult NewEQ = getDerived().TransformExpr(QE);
+
+ if (!NewEQ.isUsable())
+ break;
+ NewEQ = getSema().OpenACC().ActOnIntExpr(OpenACCDirectiveKind::Wait,
+ OpenACCClauseKind::Invalid,
+ C->getBeginLoc(), NewEQ.get());
+ if (NewEQ.isUsable())
+ QueueIdExprs.push_back(NewEQ.get());
+ }
+
+ llvm::SmallVector<OpenACCClause *> TransformedClauses =
+ getDerived().TransformOpenACCClauseList(C->getDirectiveKind(),
+ C->clauses());
+
+ if (getSema().OpenACC().ActOnStartStmtDirective(
+ C->getDirectiveKind(), C->getBeginLoc(), TransformedClauses))
+ return StmtError();
+
+ return getDerived().RebuildOpenACCWaitConstruct(
+ C->getBeginLoc(), C->getDirectiveLoc(), C->getLParenLoc(),
+ DevNumExpr.isUsable() ? DevNumExpr.get() : nullptr, C->getQueuesLoc(),
+ QueueIdExprs, C->getRParenLoc(), C->getEndLoc(), TransformedClauses);
+}
+
+template <typename Derived>
ExprResult TreeTransform<Derived>::TransformOpenACCAsteriskSizeExpr(
OpenACCAsteriskSizeExpr *E) {
if (getDerived().AlwaysRebuild())
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 21ad6c5..8fe0412 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2870,6 +2870,22 @@ void ASTStmtReader::VisitOpenACCHostDataConstruct(OpenACCHostDataConstruct *S) {
VisitOpenACCAssociatedStmtConstruct(S);
}
+void ASTStmtReader::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) {
+ VisitStmt(S);
+ // Consume the count of Expressions.
+ (void)Record.readInt();
+ VisitOpenACCConstructStmt(S);
+ S->LParenLoc = Record.readSourceLocation();
+ S->RParenLoc = Record.readSourceLocation();
+ S->QueuesLoc = Record.readSourceLocation();
+
+ for (unsigned I = 0; I < S->NumExprs; ++I) {
+ S->getExprPtr()[I] = cast_if_present<Expr>(Record.readSubStmt());
+ assert((I == 0 || S->getExprPtr()[I] != nullptr) &&
+ "Only first expression should be null");
+ }
+}
+
//===----------------------------------------------------------------------===//
// HLSL Constructs/Directives.
//===----------------------------------------------------------------------===//
@@ -4365,6 +4381,12 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
S = OpenACCHostDataConstruct::CreateEmpty(Context, NumClauses);
break;
}
+ case STMT_OPENACC_WAIT_CONSTRUCT: {
+ unsigned NumExprs = Record[ASTStmtReader::NumStmtFields];
+ unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+ S = OpenACCWaitConstruct::CreateEmpty(Context, NumExprs, NumClauses);
+ break;
+ }
case EXPR_REQUIRES: {
unsigned numLocalParameters = Record[ASTStmtReader::NumExprFields];
unsigned numRequirement = Record[ASTStmtReader::NumExprFields + 1];
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index e55cbe1..f13443d 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2951,6 +2951,20 @@ void ASTStmtWriter::VisitOpenACCHostDataConstruct(OpenACCHostDataConstruct *S) {
Code = serialization::STMT_OPENACC_HOST_DATA_CONSTRUCT;
}
+void ASTStmtWriter::VisitOpenACCWaitConstruct(OpenACCWaitConstruct *S) {
+ VisitStmt(S);
+ Record.push_back(S->getExprs().size());
+ VisitOpenACCConstructStmt(S);
+ Record.AddSourceLocation(S->LParenLoc);
+ Record.AddSourceLocation(S->RParenLoc);
+ Record.AddSourceLocation(S->QueuesLoc);
+
+ for(Expr *E : S->getExprs())
+ Record.AddStmt(E);
+
+ Code = serialization::STMT_OPENACC_WAIT_CONSTRUCT;
+}
+
//===----------------------------------------------------------------------===//
// HLSL Constructs/Directives.
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index ae43c595..0a74a80 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1829,6 +1829,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
case Stmt::OpenACCEnterDataConstructClass:
case Stmt::OpenACCExitDataConstructClass:
case Stmt::OpenACCHostDataConstructClass:
+ case Stmt::OpenACCWaitConstructClass:
case Stmt::OMPUnrollDirectiveClass:
case Stmt::OMPMetaDirectiveClass:
case Stmt::HLSLOutArgExprClass: {
diff --git a/clang/test/AST/ast-print-openacc-wait-construct.cpp b/clang/test/AST/ast-print-openacc-wait-construct.cpp
new file mode 100644
index 0000000..3535459
--- /dev/null
+++ b/clang/test/AST/ast-print-openacc-wait-construct.cpp
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fopenacc -ast-print %s -o - | FileCheck %s
+
+void uses() {
+ int *iPtr;
+ int I;
+ float array[5];
+
+// CHECK: #pragma acc wait() if(I == array[I])
+#pragma acc wait() if(I == array[I])
+
+// CHECK: #pragma acc wait(*iPtr, I) async
+#pragma acc wait(*iPtr, I) async
+
+// CHECK: #pragma acc wait(queues: *iPtr, I) async(*iPtr)
+#pragma acc wait(queues:*iPtr, I) async(*iPtr)
+
+// CHECK: #pragma acc wait(devnum: I : *iPtr, I) async(I)
+#pragma acc wait(devnum:I:*iPtr, I) async(I)
+
+// CHECK: #pragma acc wait(devnum: I : queues: *iPtr, I) if(I == array[I]) async(I)
+#pragma acc wait(devnum:I:queues:*iPtr, I) if(I == array[I]) async(I)
+}
diff --git a/clang/test/CoverageMapping/switch.cpp b/clang/test/CoverageMapping/switch.cpp
index a1fee64..db4cddb 100644
--- a/clang/test/CoverageMapping/switch.cpp
+++ b/clang/test/CoverageMapping/switch.cpp
@@ -2,13 +2,13 @@
// CHECK: foo
void foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+11]]:2 = #0
- switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = 0, ((#0 - #2) - #3)
+ switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#2 + #3), ((#0 - #2) - #3)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+5]]:10 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:11 = #2
- return; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0
+ return; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:12 -> [[@LINE+1]]:3 = 0
case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #3
- break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0
+ break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3)
} // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE+1]]:3 = #1
int x = 0; // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:2 = #1
@@ -28,14 +28,14 @@ void bar(int i) { // CHECK-NEXT: File 0, [[@LINE]]:17 -> [[@LINE+21]]:2 = #0
nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:11 -> [[@LINE-1]]:12 = 0, #2
// CHECK-NEXT: File 0, [[@LINE-1]]:5 -> [[@LINE-1]]:10 = 0
switch (i) // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+11]]:2 = #3
- case 1: // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:11 -> [[@LINE-1]]:12 = 0, (#3 - #5)
+ case 1: // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:11 -> [[@LINE-1]]:12 = #5, (#3 - #5)
// CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+1]]:10 = #5
- nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #5, 0
+ nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #5, (#3 - #5)
// CHECK-NEXT: File 0, [[@LINE+1]]:3 -> [[@LINE+7]]:2 = #4
- switch (i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:12 = 0, (#4 - #7)
+ switch (i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:12 = #7, (#4 - #7)
nop(); // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+2]]:10 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #7
- nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, 0
+ nop(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, (#4 - #7)
}
nop(); // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:2 = #6
}
@@ -53,35 +53,35 @@ int main() { // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE+39]]:2 = #0
int i = 0;
switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+8]]:10 = 0
case 0: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = #2
- i = 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0
+ i = 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2)
break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:3 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = #3
- i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0
+ i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3)
break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:3 = 0
default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = #4
- break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #4, 0
+ break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #4, (#0 - #4)
} // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE+1]]:3 = #1
switch(i) { // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+27]]:2 = #1
case 0: // CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+7]]:10 = 0
i = 1; // CHECK-NEXT: File 0, [[@LINE-1]]:3 -> [[@LINE+1]]:10 = #6
- break; // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #6, 0
+ break; // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:3 -> [[@LINE-2]]:9 = #6, (#1 - #6)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:11 -> [[@LINE+1]]:3 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:10 = #7
- i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, 0
+ i = 2; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #7, (#1 - #7)
default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:10 = (#7 + #8)
- break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #8, 0
+ break; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #8, (#1 - #8)
} // CHECK-NEXT: Gap,File 0, [[@LINE]]:4 -> [[@LINE+2]]:3 = #5
// CHECK-NEXT: File 0, [[@LINE+1]]:3 -> [[@LINE+17]]:2 = #5
- switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = 0, ((((#5 - #10) - #11) - #12) - #13)
+ switch(i) { // CHECK-NEXT: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (((#10 + #11) + #12) + #13), ((((#5 - #10) - #11) - #12) - #13)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+8]]:11 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+7]]:11 = #10
- // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #10, 0
+ // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #10, (#5 - #10)
case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+5]]:11 = (#10 + #11)
- i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #11, 0
+ i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #11, (#5 - #11)
case 3: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:11 = ((#10 + #11) + #12)
- // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #12, 0
+ // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #12, (#5 - #12)
case 4: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:11 = (((#10 + #11) + #12) + #13)
- i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #13, 0
+ i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #13, (#5 - #13)
}
foo(1); // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+3]]:11 = #9
@@ -95,10 +95,10 @@ int pr44011(int i) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> {{.*}}:2 = #0
switch (i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+6]]:13 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = #2
- return 0; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0
+ return 0; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+1]]:3 = 0
default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13 = #3
- return 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #3, 0
+ return 1; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #3, (#0 - #3)
}
} // A region for counter #1 is missing due to the missing return.
@@ -106,17 +106,17 @@ int pr44011(int i) { // CHECK-NEXT: File 0, [[@LINE]]:20 -> {{.*}}:2 = #0
// FIXME: End location for "case 1" shouldn't point at the end of the switch.
// CHECK: fallthrough
int fallthrough(int i) { // CHECK-NEXT: File 0, [[@LINE]]:24 -> [[@LINE+14]]:2 = #0
- // CHECK-NEXT: Branch,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:11 = 0, ((((#0 - #2) - #3) - #4) - #5)
+ // CHECK-NEXT: Branch,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:11 = (((#2 + #3) + #4) + #5), ((((#0 - #2) - #3) - #4) - #5)
switch(i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:13 -> [[@LINE+10]]:10 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+9]]:10 = #2
- i = 23; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, 0
+ i = 23; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #2, (#0 - #2)
case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = (#2 + #3)
- i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0
+ i = 11; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3)
break; // CHECK-NEXT: Gap,File 0, [[@LINE]]:11 -> [[@LINE+1]]:3 = 0
case 3: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+4]]:10 = #4
- // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, 0
+ // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, (#0 - #4)
case 4: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:10 = (#4 + #5)
- i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #5, 0
+ i = 99; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #5, (#0 - #5)
break;
}
}
@@ -126,12 +126,12 @@ void abort(void) __attribute((noreturn));
int noret(int x) { // CHECK-NEXT: File 0, [[@LINE]]:18 -> [[@LINE+11]]:2
switch (x) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+8]]:14 = 0
default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:12
- abort(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, 0
+ abort(); // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, (#0 - #2)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:13 -> [[@LINE+1]]:3 = 0
case 1: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:13
- return 5; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, 0
+ return 5; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #3, (#0 - #3)
// CHECK-NEXT: Gap,File 0, [[@LINE-1]]:14 -> [[@LINE+1]]:3 = 0
case 2: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+1]]:14
- return 10; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, 0
+ return 10; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, (#0 - #4)
}
}
diff --git a/clang/test/CoverageMapping/switchmacro.c b/clang/test/CoverageMapping/switchmacro.c
index 0696e74..4c98cc7 100644
--- a/clang/test/CoverageMapping/switchmacro.c
+++ b/clang/test/CoverageMapping/switchmacro.c
@@ -6,7 +6,7 @@
int foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:16 -> {{[0-9]+}}:2 = #0
switch (i) { // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> {{[0-9]+}}:11 = 0
default: // CHECK-NEXT: File 0, [[@LINE]]:3 -> {{[0-9]+}}:11 = #2
- // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, 0
+ // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:10 = #2, (#0 - #2)
if (i == 1) // CHECK-NEXT: File 0, [[@LINE]]:9 -> [[@LINE]]:15 = #2
// CHECK-NEXT: Branch,File 0, [[@LINE-1]]:9 -> [[@LINE-1]]:15 = #3, (#2 - #3)
return 0; // CHECK: File 0, [[@LINE]]:7 -> [[@LINE]]:15 = #3
@@ -15,7 +15,7 @@ int foo(int i) { // CHECK-NEXT: File 0, [[@LINE]]:16 -> {{[0-9]+}}:2 = #0
// CHECK-NEXT: File 0, [[@LINE+1]]:8 -> {{[0-9]+}}:11 = (#2 - #3)
FOO(1);
case 0: // CHECK-NEXT: File 0, [[@LINE]]:3 -> [[@LINE+2]]:13 = ((#2 + #4) - #3)
- // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, 0
+ // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:3 -> [[@LINE-1]]:9 = #4, (#0 - #4)
return 2; // CHECK-NEXT: Gap,File 0, [[@LINE]]:14 -> [[@LINE+4]]:3 = 0
// CHECK-NEXT: Expansion,File 0, [[@LINE+2]]:3 -> [[@LINE+2]]:6 = 0
diff --git a/clang/test/ParserOpenACC/parse-wait-construct.c b/clang/test/ParserOpenACC/parse-wait-construct.c
index 8f7ea8e..17b7ecb 100644
--- a/clang/test/ParserOpenACC/parse-wait-construct.c
+++ b/clang/test/ParserOpenACC/parse-wait-construct.c
@@ -3,171 +3,135 @@
void func() {
int i, j;
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
#pragma acc wait
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait clause-list
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait (
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
#pragma acc wait ()
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait () clause-list
- // expected-error@+4{{expected expression}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{expected expression}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait (devnum:
- // expected-error@+2{{expected expression}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{expected expression}}
#pragma acc wait (devnum:)
- // expected-error@+3{{expected expression}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{expected expression}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait (devnum:) clause-list
- // expected-error@+4{{expected ':'}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{expected ':'}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait (devnum: i + j
- // expected-error@+2{{expected ':'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{expected ':'}}
#pragma acc wait (devnum: i + j)
- // expected-error@+3{{expected ':'}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{expected ':'}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait (devnum: i + j) clause-list
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait (queues:
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
#pragma acc wait (queues:)
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait (queues:) clause-list
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait (devnum: i + j:queues:
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
#pragma acc wait (devnum: i + j:queues:)
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait (devnum: i + j:queues:) clause-list
- // expected-error@+5{{use of undeclared identifier 'devnum'}}
- // expected-error@+4{{expected ','}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+4{{use of undeclared identifier 'devnum'}}
+ // expected-error@+3{{expected ','}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait (queues:devnum: i + j
- // expected-error@+3{{use of undeclared identifier 'devnum'}}
- // expected-error@+2{{expected ','}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{use of undeclared identifier 'devnum'}}
+ // expected-error@+1{{expected ','}}
#pragma acc wait (queues:devnum: i + j)
- // expected-error@+4{{use of undeclared identifier 'devnum'}}
- // expected-error@+3{{expected ','}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{use of undeclared identifier 'devnum'}}
+ // expected-error@+2{{expected ','}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait (queues:devnum: i + j) clause-list
- // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait(i, j, 1+1, 3.3
- // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
#pragma acc wait(i, j, 1+1, 3.3)
- // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait(i, j, 1+1, 3.3) clause-list
- // expected-error@+4{{expected expression}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{expected expression}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait(,
- // expected-error@+2{{expected expression}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{expected expression}}
#pragma acc wait(,)
- // expected-error@+3{{expected expression}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{expected expression}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait(,) clause-list
- // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait(queues:i, j, 1+1, 3.3
- // expected-error@+5{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+4{{expected expression}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+3{{expected expression}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait(queues:i, j, 1+1, 3.3,
- // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
#pragma acc wait(queues:i, j, 1+1, 3.3)
- // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait(queues:i, j, 1+1, 3.3) clause-list
- // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait(devnum:3:i, j, 1+1, 3.3
- // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
#pragma acc wait(devnum:3:i, j, 1+1, 3.3)
- // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait(devnum:3:i, j, 1+1, 3.3) clause-list
- // expected-error@+4{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+3{{expected ')'}}
- // expected-note@+2{{to match this '('}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+2{{expected ')'}}
+ // expected-note@+1{{to match this '('}}
#pragma acc wait(devnum:3:queues:i, j, 1+1, 3.3
- // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
#pragma acc wait(devnum:3:queues:i, j, 1+1, 3.3)
- // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
- // expected-error@+2{{invalid OpenACC clause 'clause'}}
- // expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
+ // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('double' invalid)}}
+ // expected-error@+1{{invalid OpenACC clause 'clause'}}
#pragma acc wait(devnum:3:queues:i, j, 1+1, 3.3) clause-list
}
diff --git a/clang/test/SemaOpenACC/combined-construct-default-clause.c b/clang/test/SemaOpenACC/combined-construct-default-clause.c
index 43c2883..2d77faa 100644
--- a/clang/test/SemaOpenACC/combined-construct-default-clause.c
+++ b/clang/test/SemaOpenACC/combined-construct-default-clause.c
@@ -35,7 +35,6 @@ void SingleOnly() {
#pragma acc loop default(none)
for(int i = 5; i < 10;++i);
- // expected-warning@+2{{OpenACC construct 'wait' not yet implemented}}
// expected-error@+1{{OpenACC 'default' clause is not valid on 'wait' directive}}
#pragma acc wait default(none)
while(0);
diff --git a/clang/test/SemaOpenACC/compute-construct-default-clause.c b/clang/test/SemaOpenACC/compute-construct-default-clause.c
index dfa5cd3..86a7587 100644
--- a/clang/test/SemaOpenACC/compute-construct-default-clause.c
+++ b/clang/test/SemaOpenACC/compute-construct-default-clause.c
@@ -35,7 +35,6 @@ void SingleOnly() {
#pragma acc loop default(none)
for(int i = 5; i < 10;++i);
- // expected-warning@+2{{OpenACC construct 'wait' not yet implemented}}
// expected-error@+1{{OpenACC 'default' clause is not valid on 'wait' directive}}
#pragma acc wait default(none)
while(0);
diff --git a/clang/test/SemaOpenACC/unimplemented-construct.c b/clang/test/SemaOpenACC/unimplemented-construct.c
index 3f4bc37..42737eb 100644
--- a/clang/test/SemaOpenACC/unimplemented-construct.c
+++ b/clang/test/SemaOpenACC/unimplemented-construct.c
@@ -4,8 +4,8 @@
#pragma acc routine
struct S {
-// expected-warning@+1{{OpenACC construct 'wait' not yet implemented, pragma ignored}}
-#pragma acc wait
+// expected-warning@+1{{OpenACC construct 'set' not yet implemented, pragma ignored}}
+#pragma acc set
int foo;
};
diff --git a/clang/test/SemaOpenACC/wait-construct-ast.cpp b/clang/test/SemaOpenACC/wait-construct-ast.cpp
new file mode 100644
index 0000000..58214f1
--- /dev/null
+++ b/clang/test/SemaOpenACC/wait-construct-ast.cpp
@@ -0,0 +1,225 @@
+// RUN: %clang_cc1 %s -fopenacc -ast-dump | FileCheck %s
+
+// Test this with PCH.
+// RUN: %clang_cc1 %s -fopenacc -emit-pch -o %t %s
+// RUN: %clang_cc1 %s -fopenacc -include-pch %t -ast-dump-all | FileCheck %s
+
+#ifndef PCH_HELPER
+#define PCH_HELPER
+
+int some_int();
+long some_long();
+
+void NormalFunc() {
+ // CHECK-LABEL: NormalFunc
+ // CHECK-NEXT: CompoundStmt
+
+#pragma acc wait async(some_int())
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: async clause
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+#pragma acc wait() async
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: async clause
+#pragma acc wait(some_int(), some_long()) if (some_int() < some_long())
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+ // CHECK-NEXT: if clause
+ // CHECK-NEXT: BinaryOperator{{.*}} 'bool' '<'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}} 'long'
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+#pragma acc wait(queues:some_int(), some_long())
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+#pragma acc wait(devnum:some_int() : queues:some_int(), some_long())
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+#pragma acc wait(devnum:some_int() : some_int(), some_long())
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+}
+
+template<typename T>
+void TemplFunc(T t) {
+ // CHECK-LABEL: FunctionTemplateDecl {{.*}}TemplFunc
+ // CHECK-NEXT: TemplateTypeParmDecl
+ // CHECK-NEXT: FunctionDecl{{.*}}TemplFunc
+ // CHECK-NEXT: ParmVarDecl{{.*}} t 'T'
+ // CHECK-NEXT: CompoundStmt
+
+#pragma acc wait async(T::value)
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: async clause
+ // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>'
+ // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T'
+#pragma acc wait() async
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: async clause
+#pragma acc wait(t, T::value) if (T::value > t)
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue ParmVar{{.*}} 't' 'T'
+ // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>'
+ // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T'
+ // CHECK-NEXT: if clause
+ // CHECK-NEXT: BinaryOperator{{.*}} '<dependent type>' '>'
+ // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>'
+ // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T'
+ // CHECK-NEXT: DeclRefExpr{{.*}} 'T' lvalue ParmVar{{.*}} 't' 'T'
+#pragma acc wait(queues:typename T::IntTy{}, T::value) if (typename T::IntTy{} < typename T::ShortTy{})
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::IntTy' list
+ // CHECK-NEXT: InitListExpr
+ // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>'
+ // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T'
+ // CHECK-NEXT: if clause
+ // CHECK-NEXT: BinaryOperator{{.*}} '<dependent type>' '<'
+ // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::IntTy' list
+ // CHECK-NEXT: InitListExpr
+ // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::ShortTy' list
+ // CHECK-NEXT: InitListExpr
+#pragma acc wait(devnum:typename T::ShortTy{} : queues:some_int(), T::value)
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::ShortTy' list
+ // CHECK-NEXT: InitListExpr
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>'
+ // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T'
+#pragma acc wait(devnum:typename T::ShortTy{} : T::value, some_long())
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: CXXUnresolvedConstructExpr{{.*}}'typename T::ShortTy' list
+ // CHECK-NEXT: InitListExpr
+ // CHECK-NEXT: DependentScopeDeclRefExpr{{.*}} '<dependent type>'
+ // CHECK-NEXT: NestedNameSpecifier{{.*}} 'T'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+
+ // Instantiation:
+ // CHECK-NEXT: FunctionDecl{{.*}} TemplFunc 'void (HasInt)' implicit_instantiation
+ // CHECK-NEXT: TemplateArgument type 'HasInt'
+ // CHECK-NEXT: RecordType{{.*}} 'HasInt'
+ // CHECK-NEXT: CXXRecord{{.*}} 'HasInt'
+ // CHECK-NEXT: ParmVarDecl{{.*}} t 'HasInt'
+ // CHECK-NEXT: CompoundStmt
+
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: async clause
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int'
+ // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt'
+ //
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: async clause
+ //
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'char' <UserDefinedConversion>
+ // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char'
+ // CHECK-NEXT: MemberExpr{{.*}}.operator char
+ // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar{{.*}} 't' 'HasInt'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int'
+ // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt'
+ // CHECK-NEXT: if clause
+ // CHECK-NEXT: BinaryOperator{{.*}} 'bool' '>'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int'
+ // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int' <IntegralCast>
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'char' <UserDefinedConversion>
+ // CHECK-NEXT: CXXMemberCallExpr{{.*}}'char'
+ // CHECK-NEXT: MemberExpr{{.*}}.operator char
+ // CHECK-NEXT: DeclRefExpr{{.*}} 'HasInt' lvalue ParmVar{{.*}} 't' 'HasInt'
+ //
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: <<<NULL>>
+ // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::IntTy':'int'
+ // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::IntTy':'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int'
+ // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt'
+ // CHECK-NEXT: if clause
+ // CHECK-NEXT: BinaryOperator{{.*}} 'bool' '<'
+ // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::IntTy':'int'
+ // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::IntTy':'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::ShortTy':'short'
+ // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short'
+ //
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::ShortTy':'short'
+ // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short'
+ // CHECK-NEXT: CallExpr{{.*}}'int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_int'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int'
+ // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt'
+ //
+ // CHECK-NEXT: OpenACCWaitConstruct{{.*}}wait
+ // CHECK-NEXT: CXXFunctionalCastExpr{{.*}}'typename HasInt::ShortTy':'short'
+ // CHECK-NEXT: InitListExpr{{.*}}'typename HasInt::ShortTy':'short'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'int'
+ // CHECK-NEXT: DeclRefExpr{{.*}}'value' 'const int'
+ // CHECK-NEXT: NestedNameSpecifier {{.*}}'HasInt'
+ // CHECK-NEXT: CallExpr{{.*}}'long'
+ // CHECK-NEXT: ImplicitCastExpr{{.*}}'long (*)()' <FunctionToPointerDecay>
+ // CHECK-NEXT: DeclRefExpr{{.*}}'some_long'
+}
+
+struct HasInt {
+ using IntTy = int;
+ using ShortTy = short;
+ static constexpr int value = 1;
+
+ operator char();
+};
+void use() {
+ TemplFunc(HasInt{});
+}
+#endif
diff --git a/clang/test/SemaOpenACC/wait-construct.cpp b/clang/test/SemaOpenACC/wait-construct.cpp
new file mode 100644
index 0000000..a68fc54
--- /dev/null
+++ b/clang/test/SemaOpenACC/wait-construct.cpp
@@ -0,0 +1,95 @@
+// RUN: %clang_cc1 %s -fopenacc -verify
+
+struct NotConvertible{} NC;
+short getS();
+int getI();
+
+struct AmbiguousConvert{
+ operator int(); // #AMBIG_INT
+ operator short(); // #AMBIG_SHORT
+ operator float();
+} Ambiguous;
+
+struct ExplicitConvertOnly {
+ explicit operator int() const; // #EXPL_CONV
+} Explicit;
+
+void uses() {
+ int arr[5];
+#pragma acc wait(getS(), getI())
+#pragma acc wait(devnum:getS(): getI())
+#pragma acc wait(devnum:getS(): queues: getI(), getS())
+#pragma acc wait(devnum:getS(): getI(), getS())
+
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('struct NotConvertible' invalid)}}
+#pragma acc wait(devnum:NC : 5)
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('struct NotConvertible' invalid)}}
+#pragma acc wait(devnum:5 : NC)
+ // expected-error@+3{{OpenACC directive 'wait' requires expression of integer type ('int[5]' invalid)}}
+ // expected-error@+2{{OpenACC directive 'wait' requires expression of integer type ('int[5]' invalid)}}
+ // expected-error@+1{{OpenACC directive 'wait' requires expression of integer type ('struct NotConvertible' invalid)}}
+#pragma acc wait(devnum:arr : queues: arr, NC, 5)
+
+ // expected-error@+3{{multiple conversions from expression type 'struct AmbiguousConvert' to an integral type}}
+ // expected-note@#AMBIG_INT{{conversion to integral type 'int'}}
+ // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}}
+#pragma acc wait(Ambiguous)
+
+ // expected-error@+2{{OpenACC integer expression type 'struct ExplicitConvertOnly' requires explicit conversion to 'int'}}
+ // expected-note@#EXPL_CONV{{conversion to integral type 'int'}}
+#pragma acc wait(4, Explicit, 5)
+
+ // expected-error@+1{{use of undeclared identifier 'queues'}}
+#pragma acc wait(devnum: queues: 5)
+
+#pragma acc wait async
+#pragma acc wait async(getI())
+ // expected-error@+1{{OpenACC clause 'async' requires expression of integer type ('struct NotConvertible' invalid)}}
+#pragma acc wait async(NC)
+
+#pragma acc wait if(getI() < getS())
+ // expected-error@+1{{value of type 'struct NotConvertible' is not contextually convertible to 'bool'}}
+#pragma acc wait if(NC)
+
+}
+
+template<typename T>
+void TestInst() {
+ // expected-error@+4{{multiple conversions from expression type 'const AmbiguousConvert' to an integral type}}
+ // expected-note@#INST{{in instantiation of function template specialization}}
+ // expected-note@#AMBIG_INT{{conversion to integral type 'int'}}
+ // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}}
+#pragma acc wait(devnum:T::value :queues:T::ACValue)
+
+ // expected-error@+5{{OpenACC integer expression type 'const ExplicitConvertOnly' requires explicit conversion to 'int'}}
+ // expected-note@#EXPL_CONV{{conversion to integral type 'int'}}
+ // expected-error@+3{{multiple conversions from expression type 'const AmbiguousConvert' to an integral type}}
+ // expected-note@#AMBIG_INT{{conversion to integral type 'int'}}
+ // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}}
+#pragma acc wait(devnum:T::EXValue :queues:T::ACValue)
+
+ // expected-error@+1{{no member named 'Invalid' in 'HasInt'}}
+#pragma acc wait(queues: T::Invalid, T::Invalid2)
+
+ // expected-error@+3{{multiple conversions from expression type 'const AmbiguousConvert' to an integral type}}
+ // expected-note@#AMBIG_INT{{conversion to integral type 'int'}}
+ // expected-note@#AMBIG_SHORT{{conversion to integral type 'short'}}
+#pragma acc wait async(T::ACValue)
+
+#pragma acc wait if(T::value < T{})
+ // expected-error@+1{{value of type 'const ExplicitConvertOnly' is not contextually convertible to 'bool'}}
+#pragma acc wait if(T::EXValue)
+}
+
+struct HasInt {
+ using IntTy = int;
+ using ShortTy = short;
+ static constexpr int value = 1;
+ static constexpr AmbiguousConvert ACValue;
+ static constexpr ExplicitConvertOnly EXValue;
+
+ operator char();
+};
+void Inst() {
+ TestInst<HasInt>(); // #INST
+}
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 7015821..9bdc4c9 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2189,6 +2189,7 @@ public:
void VisitOpenACCEnterDataConstruct(const OpenACCEnterDataConstruct *D);
void VisitOpenACCExitDataConstruct(const OpenACCExitDataConstruct *D);
void VisitOpenACCHostDataConstruct(const OpenACCHostDataConstruct *D);
+ void VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *D);
void VisitOMPExecutableDirective(const OMPExecutableDirective *D);
void VisitOMPLoopBasedDirective(const OMPLoopBasedDirective *D);
void VisitOMPLoopDirective(const OMPLoopDirective *D);
@@ -3632,6 +3633,12 @@ void EnqueueVisitor::VisitOpenACCHostDataConstruct(
EnqueueChildren(Clause);
}
+void EnqueueVisitor::VisitOpenACCWaitConstruct(const OpenACCWaitConstruct *C) {
+ EnqueueChildren(C);
+ for (auto *Clause : C->clauses())
+ EnqueueChildren(Clause);
+}
+
void EnqueueVisitor::VisitAnnotateAttr(const AnnotateAttr *A) {
EnqueueChildren(A);
}
@@ -6394,6 +6401,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
return cxstring::createRef("OpenACCExitDataConstruct");
case CXCursor_OpenACCHostDataConstruct:
return cxstring::createRef("OpenACCHostDataConstruct");
+ case CXCursor_OpenACCWaitConstruct:
+ return cxstring::createRef("OpenACCWaitConstruct");
}
llvm_unreachable("Unhandled CXCursorKind");
diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 26935c4..b9fd3b4 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -900,6 +900,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
case Stmt::OpenACCHostDataConstructClass:
K = CXCursor_OpenACCHostDataConstruct;
break;
+ case Stmt::OpenACCWaitConstructClass:
+ K = CXCursor_OpenACCWaitConstruct;
+ break;
case Stmt::OMPTargetParallelGenericLoopDirectiveClass:
K = CXCursor_OMPTargetParallelGenericLoopDirective;
break;
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index eeb857a..e892f10 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -14503,10 +14503,13 @@ TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
FormatStyle DoNotMergeNoColumnLimit = NoColumnLimit;
DoNotMergeNoColumnLimit.AllowShortFunctionsOnASingleLine =
FormatStyle::SFS_None;
- verifyFormat("A()\n"
- " : b(0) {\n"
+ verifyFormat("A() : b(0) {\n"
"}",
- "A():b(0){}", DoNotMergeNoColumnLimit);
+ DoNotMergeNoColumnLimit);
+ verifyNoChange("A()\n"
+ " : b(0) {\n"
+ "}",
+ DoNotMergeNoColumnLimit);
verifyFormat("A()\n"
" : b(0) {\n"
"}",
diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp
index 38658fc..b2fb522 100644
--- a/clang/unittests/Format/TokenAnnotatorTest.cpp
+++ b/clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -2277,6 +2277,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) {
ASSERT_EQ(Tokens.size(), 21u) << Tokens;
EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown);
+ Tokens = annotate("Class<Type>{foo}->func(arg);");
+ ASSERT_EQ(Tokens.size(), 14u) << Tokens;
+ EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_Unknown); // Not FunctionLBrace
+ EXPECT_BRACE_KIND(Tokens[4], BK_BracedInit);
+ EXPECT_BRACE_KIND(Tokens[6], BK_BracedInit);
+ EXPECT_TOKEN(Tokens[7], tok::arrow, TT_Unknown);
+
auto Style = getLLVMStyle();
Style.StatementAttributeLikeMacros.push_back("emit");
Tokens = annotate("emit foo()->bar;", Style);
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index bc0020e..6899505 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -392,6 +392,10 @@ struct IntrinsicLibrary {
fir::ExtendedValue genSum(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
void genSignalSubroutine(llvm::ArrayRef<fir::ExtendedValue>);
void genSleep(llvm::ArrayRef<fir::ExtendedValue>);
+ void genSyncThreads(llvm::ArrayRef<fir::ExtendedValue>);
+ mlir::Value genSyncThreadsAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genSystem(std::optional<mlir::Type>,
mlir::ArrayRef<fir::ExtendedValue> args);
void genSystemClock(llvm::ArrayRef<fir::ExtendedValue>);
@@ -401,6 +405,9 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTranspose(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ void genThreadFence(llvm::ArrayRef<fir::ExtendedValue>);
+ void genThreadFenceBlock(llvm::ArrayRef<fir::ExtendedValue>);
+ void genThreadFenceSystem(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genTrim(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genUbound(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 84fc77a..bbbacd2 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -646,6 +646,10 @@ static constexpr IntrinsicHandler handlers[]{
{"dim", asValue},
{"mask", asBox, handleDynamicOptional}}},
/*isElemental=*/false},
+ {"syncthreads", &I::genSyncThreads, {}, /*isElemental=*/false},
+ {"syncthreads_and", &I::genSyncThreadsAnd, {}, /*isElemental=*/false},
+ {"syncthreads_count", &I::genSyncThreadsCount, {}, /*isElemental=*/false},
+ {"syncthreads_or", &I::genSyncThreadsOr, {}, /*isElemental=*/false},
{"system",
&I::genSystem,
{{{"command", asBox}, {"exitstat", asBox, handleDynamicOptional}}},
@@ -655,6 +659,9 @@ static constexpr IntrinsicHandler handlers[]{
{{{"count", asAddr}, {"count_rate", asAddr}, {"count_max", asAddr}}},
/*isElemental=*/false},
{"tand", &I::genTand},
+ {"threadfence", &I::genThreadFence, {}, /*isElemental=*/false},
+ {"threadfence_block", &I::genThreadFenceBlock, {}, /*isElemental=*/false},
+ {"threadfence_system", &I::genThreadFenceSystem, {}, /*isElemental=*/false},
{"trailz", &I::genTrailz},
{"transfer",
&I::genTransfer,
@@ -7437,6 +7444,52 @@ IntrinsicLibrary::genSum(mlir::Type resultType,
resultType, args);
}
+// SYNCTHREADS
+void IntrinsicLibrary::genSyncThreads(llvm::ArrayRef<fir::ExtendedValue> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0";
+ mlir::FunctionType funcType =
+ mlir::FunctionType::get(builder.getContext(), {}, {});
+ auto funcOp = builder.createFunction(loc, funcName, funcType);
+ llvm::SmallVector<mlir::Value> noArgs;
+ builder.create<fir::CallOp>(loc, funcOp, noArgs);
+}
+
+// SYNCTHREADS_AND
+mlir::Value
+IntrinsicLibrary::genSyncThreadsAnd(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.and";
+ mlir::MLIRContext *context = builder.getContext();
+ mlir::FunctionType ftype =
+ mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
+ auto funcOp = builder.createFunction(loc, funcName, ftype);
+ return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
+}
+
+// SYNCTHREADS_COUNT
+mlir::Value
+IntrinsicLibrary::genSyncThreadsCount(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.popc";
+ mlir::MLIRContext *context = builder.getContext();
+ mlir::FunctionType ftype =
+ mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
+ auto funcOp = builder.createFunction(loc, funcName, ftype);
+ return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
+}
+
+// SYNCTHREADS_OR
+mlir::Value
+IntrinsicLibrary::genSyncThreadsOr(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.barrier0.or";
+ mlir::MLIRContext *context = builder.getContext();
+ mlir::FunctionType ftype =
+ mlir::FunctionType::get(context, {resultType}, {args[0].getType()});
+ auto funcOp = builder.createFunction(loc, funcName, ftype);
+ return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
+}
+
// SYSTEM
fir::ExtendedValue
IntrinsicLibrary::genSystem(std::optional<mlir::Type> resultType,
@@ -7567,6 +7620,38 @@ IntrinsicLibrary::genTranspose(mlir::Type resultType,
return readAndAddCleanUp(resultMutableBox, resultType, "TRANSPOSE");
}
+// THREADFENCE
+void IntrinsicLibrary::genThreadFence(llvm::ArrayRef<fir::ExtendedValue> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.gl";
+ mlir::FunctionType funcType =
+ mlir::FunctionType::get(builder.getContext(), {}, {});
+ auto funcOp = builder.createFunction(loc, funcName, funcType);
+ llvm::SmallVector<mlir::Value> noArgs;
+ builder.create<fir::CallOp>(loc, funcOp, noArgs);
+}
+
+// THREADFENCE_BLOCK
+void IntrinsicLibrary::genThreadFenceBlock(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.cta";
+ mlir::FunctionType funcType =
+ mlir::FunctionType::get(builder.getContext(), {}, {});
+ auto funcOp = builder.createFunction(loc, funcName, funcType);
+ llvm::SmallVector<mlir::Value> noArgs;
+ builder.create<fir::CallOp>(loc, funcOp, noArgs);
+}
+
+// THREADFENCE_SYSTEM
+void IntrinsicLibrary::genThreadFenceSystem(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ constexpr llvm::StringLiteral funcName = "llvm.nvvm.membar.sys";
+ mlir::FunctionType funcType =
+ mlir::FunctionType::get(builder.getContext(), {}, {});
+ auto funcOp = builder.createFunction(loc, funcName, funcType);
+ llvm::SmallVector<mlir::Value> noArgs;
+ builder.create<fir::CallOp>(loc, funcOp, noArgs);
+}
+
// TRIM
fir::ExtendedValue
IntrinsicLibrary::genTrim(mlir::Type resultType,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 0224ecf..1402bd4 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -18,27 +18,27 @@ implicit none
! Synchronization Functions
interface
- attributes(device) subroutine syncthreads() bind(c, name='__syncthreads')
+ attributes(device) subroutine syncthreads()
end subroutine
end interface
public :: syncthreads
interface
- attributes(device) integer function syncthreads_and(value) bind(c, name='__syncthreads_and')
+ attributes(device) integer function syncthreads_and(value)
integer :: value
end function
end interface
public :: syncthreads_and
interface
- attributes(device) integer function syncthreads_count(value) bind(c, name='__syncthreads_count')
+ attributes(device) integer function syncthreads_count(value)
integer :: value
end function
end interface
public :: syncthreads_count
interface
- attributes(device) integer function syncthreads_or(value) bind(c, name='__syncthreads_or')
+ attributes(device) integer function syncthreads_or(value)
integer :: value
end function
end interface
@@ -54,19 +54,19 @@ implicit none
! Memory Fences
interface
- attributes(device) subroutine threadfence() bind(c, name='__threadfence')
+ attributes(device) subroutine threadfence()
end subroutine
end interface
public :: threadfence
interface
- attributes(device) subroutine threadfence_block() bind(c, name='__threadfence_block')
+ attributes(device) subroutine threadfence_block()
end subroutine
end interface
public :: threadfence_block
interface
- attributes(device) subroutine threadfence_system() bind(c, name='__threadfence_system')
+ attributes(device) subroutine threadfence_system()
end subroutine
end interface
public :: threadfence_system
diff --git a/flang/runtime/findloc.cpp b/flang/runtime/findloc.cpp
index d05e9c5..b9b1d7f 100644
--- a/flang/runtime/findloc.cpp
+++ b/flang/runtime/findloc.cpp
@@ -159,14 +159,9 @@ struct NumericFindlocHelper {
Terminator &terminator) const {
switch (targetCat) {
case TypeCategory::Integer:
- ApplyIntegerKind<
- HELPER<CAT, KIND, TypeCategory::Integer>::template Functor, void>(
- targetKind, terminator, result, x, target, kind, dim, mask, back,
- terminator);
- break;
case TypeCategory::Unsigned:
ApplyIntegerKind<
- HELPER<CAT, KIND, TypeCategory::Unsigned>::template Functor, void>(
+ HELPER<CAT, KIND, TypeCategory::Integer>::template Functor, void>(
targetKind, terminator, result, x, target, kind, dim, mask, back,
terminator);
break;
@@ -235,13 +230,8 @@ void RTDEF(Findloc)(Descriptor &result, const Descriptor &x,
RUNTIME_CHECK(terminator, xType.has_value() && targetType.has_value());
switch (xType->first) {
case TypeCategory::Integer:
- ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer,
- TotalNumericFindlocHelper>::template Functor,
- void>(xType->second, terminator, targetType->first, targetType->second,
- result, x, target, kind, 0, mask, back, terminator);
- break;
case TypeCategory::Unsigned:
- ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Unsigned,
+ ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer,
TotalNumericFindlocHelper>::template Functor,
void>(xType->second, terminator, targetType->first, targetType->second,
result, x, target, kind, 0, mask, back, terminator);
@@ -331,13 +321,8 @@ void RTDEF(FindlocDim)(Descriptor &result, const Descriptor &x,
RUNTIME_CHECK(terminator, xType.has_value() && targetType.has_value());
switch (xType->first) {
case TypeCategory::Integer:
- ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer,
- PartialNumericFindlocHelper>::template Functor,
- void>(xType->second, terminator, targetType->first, targetType->second,
- result, x, target, kind, dim, mask, back, terminator);
- break;
case TypeCategory::Unsigned:
- ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Unsigned,
+ ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer,
PartialNumericFindlocHelper>::template Functor,
void>(xType->second, terminator, targetType->first, targetType->second,
result, x, target, kind, dim, mask, back, terminator);
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 1331b644..2042bbb 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -17,20 +17,20 @@ attributes(global) subroutine devsub()
end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
-! CHECK: fir.call @__syncthreads()
+! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> ()
! CHECK: fir.call @__syncwarp(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> ()
-! CHECK: fir.call @__threadfence()
-! CHECK: fir.call @__threadfence_block()
-! CHECK: fir.call @__threadfence_system()
-! CHECK: %{{.*}} = fir.call @__syncthreads_and(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> i32
-! CHECK: %{{.*}} = fir.call @__syncthreads_count(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> i32
-! CHECK: %{{.*}} = fir.call @__syncthreads_or(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> i32
+! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> ()
+! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> ()
+! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> ()
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1_i32_0) fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%c1_i32_1) fastmath<contract> : (i32) -> i32
+! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%c1_i32_2) fastmath<contract> : (i32) -> i32
-! CHECK: func.func private @__syncthreads() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads", fir.proc_attrs = #fir.proc_attrs<bind_c>}
+! CHECK: func.func private @llvm.nvvm.barrier0()
! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>}
-! CHECK: func.func private @__threadfence() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__threadfence", fir.proc_attrs = #fir.proc_attrs<bind_c>}
-! CHECK: func.func private @__threadfence_block() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__threadfence_block", fir.proc_attrs = #fir.proc_attrs<bind_c>}
-! CHECK: func.func private @__threadfence_system() attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__threadfence_system", fir.proc_attrs = #fir.proc_attrs<bind_c>}
-! CHECK: func.func private @__syncthreads_and(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads_and", fir.proc_attrs = #fir.proc_attrs<bind_c>}
-! CHECK: func.func private @__syncthreads_count(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads_count", fir.proc_attrs = #fir.proc_attrs<bind_c>}
-! CHECK: func.func private @__syncthreads_or(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) -> i32 attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncthreads_or", fir.proc_attrs = #fir.proc_attrs<bind_c>}
+! CHECK: func.func private @llvm.nvvm.membar.gl()
+! CHECK: func.func private @llvm.nvvm.membar.cta()
+! CHECK: func.func private @llvm.nvvm.membar.sys()
+! CHECK: func.func private @llvm.nvvm.barrier0.and(i32) -> i32
+! CHECK: func.func private @llvm.nvvm.barrier0.popc(i32) -> i32
+! CHECK: func.func private @llvm.nvvm.barrier0.or(i32) -> i32
diff --git a/libcxx/include/__algorithm/adjacent_find.h b/libcxx/include/__algorithm/adjacent_find.h
index 7672890..2508250 100644
--- a/libcxx/include/__algorithm/adjacent_find.h
+++ b/libcxx/include/__algorithm/adjacent_find.h
@@ -11,10 +11,8 @@
#define _LIBCPP___ALGORITHM_ADJACENT_FIND_H
#include <__algorithm/comp.h>
-#include <__algorithm/iterator_operations.h>
#include <__config>
#include <__functional/identity.h>
-#include <__iterator/iterator_traits.h>
#include <__type_traits/invoke.h>
#include <__utility/move.h>
diff --git a/libcxx/include/__algorithm/binary_search.h b/libcxx/include/__algorithm/binary_search.h
index 79a5ec0..4940059 100644
--- a/libcxx/include/__algorithm/binary_search.h
+++ b/libcxx/include/__algorithm/binary_search.h
@@ -13,7 +13,6 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/lower_bound.h>
#include <__config>
-#include <__iterator/iterator_traits.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h
index cd8b067..a276bb9 100644
--- a/libcxx/include/__algorithm/equal.h
+++ b/libcxx/include/__algorithm/equal.h
@@ -20,7 +20,6 @@
#include <__type_traits/desugars_to.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
-#include <__type_traits/is_constant_evaluated.h>
#include <__type_traits/is_equality_comparable.h>
#include <__type_traits/is_volatile.h>
#include <__utility/move.h>
diff --git a/libcxx/include/__algorithm/equal_range.h b/libcxx/include/__algorithm/equal_range.h
index 4e74ad2..ff6f4f2 100644
--- a/libcxx/include/__algorithm/equal_range.h
+++ b/libcxx/include/__algorithm/equal_range.h
@@ -17,10 +17,6 @@
#include <__algorithm/upper_bound.h>
#include <__config>
#include <__functional/identity.h>
-#include <__iterator/advance.h>
-#include <__iterator/distance.h>
-#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
#include <__type_traits/is_constructible.h>
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index f29633f..5069a72 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -12,7 +12,6 @@
#include <__algorithm/min.h>
#include <__config>
#include <__fwd/bit_reference.h>
-#include <__iterator/iterator_traits.h>
#include <__memory/pointer_traits.h>
#include <__utility/convert_to_integral.h>
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 14b8a78..a05d507 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -24,7 +24,6 @@
#include <__type_traits/invoke.h>
#include <__type_traits/is_equality_comparable.h>
#include <__type_traits/is_integral.h>
-#include <__type_traits/is_same.h>
#include <__type_traits/is_signed.h>
#include <__utility/move.h>
#include <limits>
diff --git a/libcxx/include/__algorithm/find_end.h b/libcxx/include/__algorithm/find_end.h
index fc876d8..86b4a3e 100644
--- a/libcxx/include/__algorithm/find_end.h
+++ b/libcxx/include/__algorithm/find_end.h
@@ -15,7 +15,6 @@
#include <__config>
#include <__functional/identity.h>
#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
#include <__type_traits/invoke.h>
#include <__utility/pair.h>
diff --git a/libcxx/include/__algorithm/find_first_of.h b/libcxx/include/__algorithm/find_first_of.h
index 4a240f7..45ec133 100644
--- a/libcxx/include/__algorithm/find_first_of.h
+++ b/libcxx/include/__algorithm/find_first_of.h
@@ -12,7 +12,6 @@
#include <__algorithm/comp.h>
#include <__config>
-#include <__iterator/iterator_traits.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h
index 259e527..e08f583 100644
--- a/libcxx/include/__algorithm/for_each.h
+++ b/libcxx/include/__algorithm/for_each.h
@@ -14,7 +14,6 @@
#include <__config>
#include <__iterator/segmented_iterator.h>
#include <__ranges/movable_box.h>
-#include <__type_traits/enable_if.h>
#include <__utility/in_place.h>
#include <__utility/move.h>
diff --git a/libcxx/include/__algorithm/includes.h b/libcxx/include/__algorithm/includes.h
index a86102f..bc6c657 100644
--- a/libcxx/include/__algorithm/includes.h
+++ b/libcxx/include/__algorithm/includes.h
@@ -13,7 +13,6 @@
#include <__algorithm/comp_ref_type.h>
#include <__config>
#include <__functional/identity.h>
-#include <__iterator/iterator_traits.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
#include <__utility/move.h>
diff --git a/libcxx/include/__algorithm/inplace_merge.h b/libcxx/include/__algorithm/inplace_merge.h
index e0ccdab..69213cc 100644
--- a/libcxx/include/__algorithm/inplace_merge.h
+++ b/libcxx/include/__algorithm/inplace_merge.h
@@ -20,8 +20,6 @@
#include <__config>
#include <__cstddef/ptrdiff_t.h>
#include <__functional/identity.h>
-#include <__iterator/advance.h>
-#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/reverse_iterator.h>
#include <__memory/destruct_n.h>
diff --git a/libcxx/include/__algorithm/is_heap.h b/libcxx/include/__algorithm/is_heap.h
index fa668c1..dfe0620 100644
--- a/libcxx/include/__algorithm/is_heap.h
+++ b/libcxx/include/__algorithm/is_heap.h
@@ -13,7 +13,6 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/is_heap_until.h>
#include <__config>
-#include <__iterator/iterator_traits.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/is_permutation.h b/libcxx/include/__algorithm/is_permutation.h
index 8753e9f..1afb115 100644
--- a/libcxx/include/__algorithm/is_permutation.h
+++ b/libcxx/include/__algorithm/is_permutation.h
@@ -17,7 +17,6 @@
#include <__iterator/concepts.h>
#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
-#include <__iterator/next.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
diff --git a/libcxx/include/__algorithm/is_sorted.h b/libcxx/include/__algorithm/is_sorted.h
index ff61a73..196ae0b 100644
--- a/libcxx/include/__algorithm/is_sorted.h
+++ b/libcxx/include/__algorithm/is_sorted.h
@@ -13,7 +13,6 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/is_sorted_until.h>
#include <__config>
-#include <__iterator/iterator_traits.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/is_sorted_until.h b/libcxx/include/__algorithm/is_sorted_until.h
index b64fb65..6066419 100644
--- a/libcxx/include/__algorithm/is_sorted_until.h
+++ b/libcxx/include/__algorithm/is_sorted_until.h
@@ -12,7 +12,6 @@
#include <__algorithm/comp.h>
#include <__algorithm/comp_ref_type.h>
#include <__config>
-#include <__iterator/iterator_traits.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/lower_bound.h b/libcxx/include/__algorithm/lower_bound.h
index 2144ba9..4fba674 100644
--- a/libcxx/include/__algorithm/lower_bound.h
+++ b/libcxx/include/__algorithm/lower_bound.h
@@ -19,7 +19,6 @@
#include <__iterator/iterator_traits.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_callable.h>
-#include <__type_traits/remove_reference.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/make_projected.h b/libcxx/include/__algorithm/make_projected.h
index 22cceb4c..4b54c50 100644
--- a/libcxx/include/__algorithm/make_projected.h
+++ b/libcxx/include/__algorithm/make_projected.h
@@ -9,16 +9,13 @@
#ifndef _LIBCPP___ALGORITHM_MAKE_PROJECTED_H
#define _LIBCPP___ALGORITHM_MAKE_PROJECTED_H
-#include <__concepts/same_as.h>
#include <__config>
#include <__functional/identity.h>
#include <__functional/invoke.h>
#include <__type_traits/decay.h>
#include <__type_traits/enable_if.h>
-#include <__type_traits/integral_constant.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_member_pointer.h>
-#include <__type_traits/is_same.h>
#include <__utility/declval.h>
#include <__utility/forward.h>
diff --git a/libcxx/include/__algorithm/merge.h b/libcxx/include/__algorithm/merge.h
index bad663c..ae859b7 100644
--- a/libcxx/include/__algorithm/merge.h
+++ b/libcxx/include/__algorithm/merge.h
@@ -13,7 +13,6 @@
#include <__algorithm/comp_ref_type.h>
#include <__algorithm/copy.h>
#include <__config>
-#include <__iterator/iterator_traits.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h
index 556bd42..a683679 100644
--- a/libcxx/include/__algorithm/mismatch.h
+++ b/libcxx/include/__algorithm/mismatch.h
@@ -27,7 +27,6 @@
#include <__type_traits/is_integral.h>
#include <__utility/move.h>
#include <__utility/pair.h>
-#include <__utility/unreachable.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 22637d4..9fa24c98 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -11,20 +11,18 @@
#define _LIBCPP___BIT_REFERENCE
#include <__algorithm/copy_n.h>
-#include <__algorithm/fill_n.h>
#include <__algorithm/min.h>
#include <__bit/countr.h>
-#include <__bit/invert_if.h>
-#include <__bit/popcount.h>
#include <__compare/ordering.h>
#include <__config>
+#include <__cstddef/size_t.h>
#include <__fwd/bit_reference.h>
#include <__iterator/iterator_traits.h>
#include <__memory/construct_at.h>
#include <__memory/pointer_traits.h>
#include <__type_traits/conditional.h>
+#include <__type_traits/is_constant_evaluated.h>
#include <__utility/swap.h>
-#include <cstring>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index fe7465a..9c821ea 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -16,6 +16,7 @@
#include <__bit/countl.h>
#include <__config>
#include <__cstddef/ptrdiff_t.h>
+#include <__cstddef/size_t.h>
#include <__functional/hash.h>
#include <__iterator/iterator_traits.h>
#include <__math/rounding_functions.h>
@@ -28,14 +29,12 @@
#include <__memory/unique_ptr.h>
#include <__new/launder.h>
#include <__type_traits/can_extract_key.h>
-#include <__type_traits/conditional.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_const.h>
#include <__type_traits/is_constructible.h>
#include <__type_traits/is_nothrow_assignable.h>
#include <__type_traits/is_nothrow_constructible.h>
-#include <__type_traits/is_pointer.h>
#include <__type_traits/is_reference.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_swappable.h>
@@ -45,7 +44,6 @@
#include <__utility/move.h>
#include <__utility/pair.h>
#include <__utility/swap.h>
-#include <cstring>
#include <limits>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/__split_buffer b/libcxx/include/__split_buffer
index 5ee70220..9d7fd3e 100644
--- a/libcxx/include/__split_buffer
+++ b/libcxx/include/__split_buffer
@@ -14,7 +14,6 @@
#include <__algorithm/move.h>
#include <__algorithm/move_backward.h>
#include <__config>
-#include <__cstddef/size_t.h>
#include <__iterator/distance.h>
#include <__iterator/iterator_traits.h>
#include <__iterator/move_iterator.h>
@@ -24,7 +23,6 @@
#include <__memory/compressed_pair.h>
#include <__memory/pointer_traits.h>
#include <__memory/swap_allocator.h>
-#include <__type_traits/add_lvalue_reference.h>
#include <__type_traits/conditional.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/integral_constant.h>
diff --git a/libcxx/include/__tree b/libcxx/include/__tree
index f6ef21c..dfb205c 100644
--- a/libcxx/include/__tree
+++ b/libcxx/include/__tree
@@ -23,14 +23,12 @@
#include <__memory/swap_allocator.h>
#include <__memory/unique_ptr.h>
#include <__type_traits/can_extract_key.h>
-#include <__type_traits/conditional.h>
#include <__type_traits/enable_if.h>
#include <__type_traits/invoke.h>
#include <__type_traits/is_const.h>
#include <__type_traits/is_constructible.h>
#include <__type_traits/is_nothrow_assignable.h>
#include <__type_traits/is_nothrow_constructible.h>
-#include <__type_traits/is_pointer.h>
#include <__type_traits/is_same.h>
#include <__type_traits/is_swappable.h>
#include <__type_traits/remove_const_ref.h>
diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h
index ef96b74..883e24d 100644
--- a/llvm/include/llvm/ProfileData/MemProf.h
+++ b/llvm/include/llvm/ProfileData/MemProf.h
@@ -323,21 +323,6 @@ struct Frame {
<< " Column: " << Column << "\n"
<< " Inline: " << IsInlineFrame << "\n";
}
-
- // Return a hash value based on the contents of the frame. Here we use a
- // cryptographic hash function to minimize the chance of hash collisions. We
- // do persist FrameIds as part of memprof formats up to Version 2, inclusive.
- // However, the deserializer never calls this function; it uses FrameIds
- // merely as keys to look up Frames proper.
- inline FrameId hash() const {
- llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
- HashBuilder;
- HashBuilder.add(Function, LineOffset, Column, IsInlineFrame);
- llvm::BLAKE3Result<8> Hash = HashBuilder.final();
- FrameId Id;
- std::memcpy(&Id, Hash.data(), sizeof(Hash));
- return Id;
- }
};
// A type representing the index into the table of call stacks.
@@ -775,9 +760,6 @@ public:
}
};
-// Compute a CallStackId for a given call stack.
-CallStackId hashCallStack(ArrayRef<FrameId> CS);
-
namespace detail {
// "Dereference" the iterator from DenseMap or OnDiskChainedHashTable. We have
// to do so in one of two different ways depending on the type of the hash
@@ -1011,7 +993,7 @@ struct IndexedMemProfData {
llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>> CallStacks;
FrameId addFrame(const Frame &F) {
- const FrameId Id = F.hash();
+ const FrameId Id = hashFrame(F);
Frames.try_emplace(Id, F);
return Id;
}
@@ -1027,6 +1009,25 @@ struct IndexedMemProfData {
CallStacks.try_emplace(CSId, std::move(CS));
return CSId;
}
+
+private:
+ // Return a hash value based on the contents of the frame. Here we use a
+ // cryptographic hash function to minimize the chance of hash collisions. We
+ // do persist FrameIds as part of memprof formats up to Version 2, inclusive.
+ // However, the deserializer never calls this function; it uses FrameIds
+ // merely as keys to look up Frames proper.
+ FrameId hashFrame(const Frame &F) const {
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
+ HashBuilder;
+ HashBuilder.add(F.Function, F.LineOffset, F.Column, F.IsInlineFrame);
+ llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+ FrameId Id;
+ std::memcpy(&Id, Hash.data(), sizeof(Hash));
+ return Id;
+ }
+
+ // Compute a CallStackId for a given call stack.
+ CallStackId hashCallStack(ArrayRef<FrameId> CS) const;
};
struct FrameStat {
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 119e091..d514485 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -1412,7 +1412,7 @@ CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) const {
}
// Capture branch regions specific to the function (excluding expansions).
for (const auto &CR : Function.CountedBranchRegions)
- if (FileIDs.test(CR.FileID) && (CR.FileID == CR.ExpandedFileID))
+ if (FileIDs.test(CR.FileID))
FileCoverage.BranchRegions.push_back(CR);
// Capture MCDC records specific to the function.
for (const auto &MR : Function.MCDCRecords)
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 1c240c3..15ab449 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -290,7 +290,7 @@ Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {
return Result;
}
-CallStackId hashCallStack(ArrayRef<FrameId> CS) {
+CallStackId IndexedMemProfData::hashCallStack(ArrayRef<FrameId> CS) const {
llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>
HashBuilder;
for (FrameId F : CS)
diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc
index c7b4c7a..aedcd9a 100644
--- a/llvm/lib/Support/Unix/Threading.inc
+++ b/llvm/lib/Support/Unix/Threading.inc
@@ -141,7 +141,7 @@ uint64_t llvm::get_threadid() {
#elif defined(__ANDROID__)
return uint64_t(gettid());
#elif defined(__linux__)
- return uint64_t(syscall(SYS_gettid));
+ return uint64_t(syscall(__NR_gettid));
#else
return uint64_t(pthread_self());
#endif
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index df0320f..f97ea40 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9292,6 +9292,7 @@ static bool isRenamedInGFX9(int Opcode) {
GENERATE_RENAMED_GFX9_CASES(AMDGPU::V_SUB_U32)
//
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
+ case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
case AMDGPU::V_INTERP_P2_F16:
case AMDGPU::V_MAD_F16_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b1f93a4..532df39 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3708,12 +3708,15 @@ def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>;
def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>;
def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
-def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
-def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
-def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
-def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+}
+
+let True16Predicate = UseFakeTrue16Insts in {
+def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
}
let OtherPredicates = [isGFX9Plus] in {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 34f90b3..df50113 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -339,8 +339,7 @@ let FPDPRounding = 1 in {
} // End Predicates = [Has16BitInsts, isGFX8Only]
let SubtargetPredicate = isGFX9Plus in {
- defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
- VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
+ defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
} // End SubtargetPredicate = isGFX9Plus
} // End FPDPRounding = 1
@@ -643,8 +642,8 @@ defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>;
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
-defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
-defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
+defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>;
+defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>;
defm V_PACK_B32_F16 : VOP3Inst_t16 <"v_pack_b32_f16", VOP_B32_F16_F16>;
@@ -1375,8 +1374,8 @@ class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR
let SubtargetPredicate = isGFX11Plus in {
defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
- defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
- defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+ defm V_MAXMIN_F16 : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>;
+ defm V_MINMAX_F16 : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>;
defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1720,7 +1719,7 @@ defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_
defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">;
defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">;
defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">;
-defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
+defm V_DIV_FIXUP_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x254, "v_div_fixup_f16", "V_DIV_FIXUP_F16_gfx9">;
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
defm V_AND_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x257>;
@@ -1731,8 +1730,8 @@ defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>;
-defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>;
-defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>;
+defm V_MAXMIN_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">;
+defm V_MINMAX_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">;
defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;
@@ -1755,8 +1754,8 @@ defm V_MIN_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x30
defm V_ADD_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30d, "v_add_nc_i16", "V_ADD_I16">;
defm V_SUB_NC_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x30e, "v_sub_nc_i16", "V_SUB_I16">;
defm V_PACK_B32_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x311, "v_pack_b32_f16">;
-defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x312, "V_CVT_PKNORM_I16_F16" , "v_cvt_pk_norm_i16_f16" >;
-defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x313, "V_CVT_PKNORM_U16_F16" , "v_cvt_pk_norm_u16_f16" >;
+defm V_CVT_PK_NORM_I16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x312, "v_cvt_pk_norm_i16_f16", "V_CVT_PKNORM_I16_F16", "v_cvt_pknorm_i16_f16">;
+defm V_CVT_PK_NORM_U16_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x313, "v_cvt_pk_norm_u16_f16", "V_CVT_PKNORM_U16_F16", "v_cvt_pknorm_u16_f16">;
defm V_SUB_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x325, "V_SUB_I32", "v_sub_nc_i32">;
defm V_ADD_NC_I32 : VOP3_Realtriple_with_name_gfx11_gfx12<0x326, "V_ADD_I32", "v_add_nc_i32">;
defm V_ADD_F64 : VOP3_Real_Base_gfx11<0x327>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 689f3b3..1afd687 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -405,16 +405,16 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
let OtherPredicates = [HasDot7Insts] in {
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_udot8, 1>;
} // End OtherPredicates = [HasDot7Insts]
let OtherPredicates = [HasDot1Insts] in {
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot4, 1>;
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
- VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
+ VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>, int_amdgcn_sdot8, 1>;
} // End OtherPredicates = [HasDot1Insts]
def DOT2_BF16_Profile
@@ -433,7 +433,7 @@ defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16", DOT2_BF16_Profile,
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
let IsDOT = 1 in
- defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
+ defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED_NO_OPSEL>,
null_frag, 1>;
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 0f568ba..930ed9a 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1454,12 +1454,13 @@ class VOP3Features<bit Clamp, bit OpSel, bit Packed, bit MAI> {
bit IsMAI = MAI;
}
-def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
-def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
-def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
-def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
-def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
-def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;
+def VOP3_REGULAR : VOP3Features<0, 0, 0, 0>;
+def VOP3_CLAMP : VOP3Features<1, 0, 0, 0>;
+def VOP3_OPSEL : VOP3Features<1, 1, 0, 0>;
+def VOP3_PACKED : VOP3Features<1, 1, 1, 0>;
+def VOP3_PACKED_NO_OPSEL : VOP3Features<1, 0, 1, 0>;
+def VOP3_MAI : VOP3Features<0, 0, 0, 1>;
+def VOP3_OPSEL_ONLY : VOP3Features<0, 1, 0, 0>;
// Packed is misleading, but it enables the appropriate op_sel
// modifiers.
@@ -1908,8 +1909,8 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName
multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> {
- defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>;
- defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>;
+ defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
}
multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
index 1783e4a..2ab2daa 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.cpp
@@ -40,7 +40,7 @@ static bool findAndReplaceVectors(Module &M);
class DataScalarizerVisitor : public InstVisitor<DataScalarizerVisitor, bool> {
public:
DataScalarizerVisitor() : GlobalMap() {}
- bool visit(Function &F);
+ bool visit(Instruction &I);
// InstVisitor methods. They return true if the instruction was scalarized,
// false if nothing changed.
bool visitInstruction(Instruction &I) { return false; }
@@ -65,28 +65,11 @@ public:
private:
GlobalVariable *lookupReplacementGlobal(Value *CurrOperand);
DenseMap<GlobalVariable *, GlobalVariable *> GlobalMap;
- SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
- bool finish();
};
-bool DataScalarizerVisitor::visit(Function &F) {
+bool DataScalarizerVisitor::visit(Instruction &I) {
assert(!GlobalMap.empty());
- ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
- for (BasicBlock *BB : RPOT) {
- for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
- Instruction *I = &*II;
- bool Done = InstVisitor::visit(I);
- ++II;
- if (Done && I->getType()->isVoidTy())
- I->eraseFromParent();
- }
- }
- return finish();
-}
-
-bool DataScalarizerVisitor::finish() {
- RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
- return true;
+ return InstVisitor::visit(I);
}
GlobalVariable *
@@ -104,6 +87,20 @@ bool DataScalarizerVisitor::visitLoadInst(LoadInst &LI) {
unsigned NumOperands = LI.getNumOperands();
for (unsigned I = 0; I < NumOperands; ++I) {
Value *CurrOpperand = LI.getOperand(I);
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
+ if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
+ GetElementPtrInst *OldGEP =
+ cast<GetElementPtrInst>(CE->getAsInstruction());
+ OldGEP->insertBefore(&LI);
+ IRBuilder<> Builder(&LI);
+ LoadInst *NewLoad =
+ Builder.CreateLoad(LI.getType(), OldGEP, LI.getName());
+ NewLoad->setAlignment(LI.getAlign());
+ LI.replaceAllUsesWith(NewLoad);
+ LI.eraseFromParent();
+ visitGetElementPtrInst(*OldGEP);
+ return true;
+ }
if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand))
LI.setOperand(I, NewGlobal);
}
@@ -114,32 +111,48 @@ bool DataScalarizerVisitor::visitStoreInst(StoreInst &SI) {
unsigned NumOperands = SI.getNumOperands();
for (unsigned I = 0; I < NumOperands; ++I) {
Value *CurrOpperand = SI.getOperand(I);
- if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand)) {
- SI.setOperand(I, NewGlobal);
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
+ if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
+ GetElementPtrInst *OldGEP =
+ cast<GetElementPtrInst>(CE->getAsInstruction());
+ OldGEP->insertBefore(&SI);
+ IRBuilder<> Builder(&SI);
+ StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP);
+ NewStore->setAlignment(SI.getAlign());
+ SI.replaceAllUsesWith(NewStore);
+ SI.eraseFromParent();
+ visitGetElementPtrInst(*OldGEP);
+ return true;
}
+ if (GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand))
+ SI.setOperand(I, NewGlobal);
}
return false;
}
bool DataScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
+
unsigned NumOperands = GEPI.getNumOperands();
+ GlobalVariable *NewGlobal = nullptr;
for (unsigned I = 0; I < NumOperands; ++I) {
Value *CurrOpperand = GEPI.getOperand(I);
- GlobalVariable *NewGlobal = lookupReplacementGlobal(CurrOpperand);
- if (!NewGlobal)
- continue;
- IRBuilder<> Builder(&GEPI);
-
- SmallVector<Value *, MaxVecSize> Indices;
- for (auto &Index : GEPI.indices())
- Indices.push_back(Index);
-
- Value *NewGEP =
- Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices);
-
- GEPI.replaceAllUsesWith(NewGEP);
- PotentiallyDeadInstrs.emplace_back(&GEPI);
+ NewGlobal = lookupReplacementGlobal(CurrOpperand);
+ if (NewGlobal)
+ break;
}
+ if (!NewGlobal)
+ return false;
+
+ IRBuilder<> Builder(&GEPI);
+ SmallVector<Value *, MaxVecSize> Indices;
+ for (auto &Index : GEPI.indices())
+ Indices.push_back(Index);
+
+ Value *NewGEP =
+ Builder.CreateGEP(NewGlobal->getValueType(), NewGlobal, Indices,
+ GEPI.getName(), GEPI.getNoWrapFlags());
+ GEPI.replaceAllUsesWith(NewGEP);
+ GEPI.eraseFromParent();
return true;
}
@@ -245,17 +258,13 @@ static bool findAndReplaceVectors(Module &M) {
for (User *U : make_early_inc_range(G.users())) {
if (isa<ConstantExpr>(U) && isa<Operator>(U)) {
ConstantExpr *CE = cast<ConstantExpr>(U);
- convertUsersOfConstantsToInstructions(CE,
- /*RestrictToFunc=*/nullptr,
- /*RemoveDeadConstants=*/false,
- /*IncludeSelf=*/true);
- }
- if (isa<Instruction>(U)) {
- Instruction *Inst = cast<Instruction>(U);
- Function *F = Inst->getFunction();
- if (F)
- Impl.visit(*F);
+ for (User *UCE : make_early_inc_range(CE->users())) {
+ if (Instruction *Inst = dyn_cast<Instruction>(UCE))
+ Impl.visit(*Inst);
+ }
}
+ if (Instruction *Inst = dyn_cast<Instruction>(U))
+ Impl.visit(*Inst);
}
}
}
diff --git a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
index 6077af9..53fc1c7 100644
--- a/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
+++ b/llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
@@ -162,11 +162,18 @@ bool DXILFlattenArraysVisitor::visitLoadInst(LoadInst &LI) {
Value *CurrOpperand = LI.getOperand(I);
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
- convertUsersOfConstantsToInstructions(CE,
- /*RestrictToFunc=*/nullptr,
- /*RemoveDeadConstants=*/false,
- /*IncludeSelf=*/true);
- return false;
+ GetElementPtrInst *OldGEP =
+ cast<GetElementPtrInst>(CE->getAsInstruction());
+ OldGEP->insertBefore(&LI);
+
+ IRBuilder<> Builder(&LI);
+ LoadInst *NewLoad =
+ Builder.CreateLoad(LI.getType(), OldGEP, LI.getName());
+ NewLoad->setAlignment(LI.getAlign());
+ LI.replaceAllUsesWith(NewLoad);
+ LI.eraseFromParent();
+ visitGetElementPtrInst(*OldGEP);
+ return true;
}
}
return false;
@@ -178,11 +185,17 @@ bool DXILFlattenArraysVisitor::visitStoreInst(StoreInst &SI) {
Value *CurrOpperand = SI.getOperand(I);
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
- convertUsersOfConstantsToInstructions(CE,
- /*RestrictToFunc=*/nullptr,
- /*RemoveDeadConstants=*/false,
- /*IncludeSelf=*/true);
- return false;
+ GetElementPtrInst *OldGEP =
+ cast<GetElementPtrInst>(CE->getAsInstruction());
+ OldGEP->insertBefore(&SI);
+
+ IRBuilder<> Builder(&SI);
+ StoreInst *NewStore = Builder.CreateStore(SI.getValueOperand(), OldGEP);
+ NewStore->setAlignment(SI.getAlign());
+ SI.replaceAllUsesWith(NewStore);
+ SI.eraseFromParent();
+ visitGetElementPtrInst(*OldGEP);
+ return true;
}
}
return false;
@@ -315,10 +328,17 @@ bool DXILFlattenArraysVisitor::visit(Function &F) {
static void collectElements(Constant *Init,
SmallVectorImpl<Constant *> &Elements) {
// Base case: If Init is not an array, add it directly to the vector.
- if (!isa<ArrayType>(Init->getType())) {
+ auto *ArrayTy = dyn_cast<ArrayType>(Init->getType());
+ if (!ArrayTy) {
Elements.push_back(Init);
return;
}
+ unsigned ArrSize = ArrayTy->getNumElements();
+ if (isa<ConstantAggregateZero>(Init)) {
+ for (unsigned I = 0; I < ArrSize; ++I)
+ Elements.push_back(Constant::getNullValue(ArrayTy->getElementType()));
+ return;
+ }
// Recursive case: Process each element in the array.
if (auto *ArrayConstant = dyn_cast<ConstantArray>(Init)) {
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index a898b6a..c283b90 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -10,8 +10,11 @@
#include "DXILConstants.h"
#include "DXILIntrinsicExpansion.h"
#include "DXILOpBuilder.h"
+#include "DXILResourceAnalysis.h"
+#include "DXILShaderFlags.h"
#include "DirectX.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DXILMetadataAnalysis.h"
#include "llvm/Analysis/DXILResource.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -763,6 +766,8 @@ PreservedAnalyses DXILOpLowering::run(Module &M, ModuleAnalysisManager &MAM) {
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<DXILResourceBindingAnalysis>();
+ PA.preserve<DXILMetadataAnalysis>();
+ PA.preserve<ShaderFlagsAnalysis>();
return PA;
}
@@ -785,6 +790,9 @@ public:
AU.addRequired<DXILResourceTypeWrapperPass>();
AU.addRequired<DXILResourceBindingWrapperPass>();
AU.addPreserved<DXILResourceBindingWrapperPass>();
+ AU.addPreserved<DXILResourceMDWrapper>();
+ AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
+ AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
};
char DXILOpLoweringLegacy::ID = 0;
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
index d6917dc..1e88963 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.cpp
@@ -14,16 +14,21 @@
#include "DXILShaderFlags.h"
#include "DirectX.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/DXILResource.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace llvm::dxil;
-static void updateFunctionFlags(ComputedShaderFlags &CSF,
- const Instruction &I) {
+static void updateFunctionFlags(ComputedShaderFlags &CSF, const Instruction &I,
+ DXILResourceTypeMap &DRTM) {
if (!CSF.Doubles)
CSF.Doubles = I.getType()->isDoubleTy();
@@ -44,17 +49,34 @@ static void updateFunctionFlags(ComputedShaderFlags &CSF,
break;
}
}
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::dx_typedBufferLoad: {
+ dxil::ResourceTypeInfo &RTI =
+ DRTM[cast<TargetExtType>(II->getArgOperand(0)->getType())];
+ if (RTI.isTyped())
+ CSF.TypedUAVLoadAdditionalFormats |= RTI.getTyped().ElementCount > 1;
+ }
+ }
+ }
}
-void ModuleShaderFlags::initialize(const Module &M) {
+void ModuleShaderFlags::initialize(const Module &M, DXILResourceTypeMap &DRTM) {
+
// Collect shader flags for each of the functions
for (const auto &F : M.getFunctionList()) {
- if (F.isDeclaration())
+ if (F.isDeclaration()) {
+ assert(!F.getName().starts_with("dx.op.") &&
+ "DXIL Shader Flag analysis should not be run post-lowering.");
continue;
+ }
ComputedShaderFlags CSF;
for (const auto &BB : F)
for (const auto &I : BB)
- updateFunctionFlags(CSF, I);
+ updateFunctionFlags(CSF, I, DRTM);
// Insert shader flag mask for function F
FunctionFlags.push_back({&F, CSF});
// Update combined shader flags mask
@@ -101,8 +123,11 @@ AnalysisKey ShaderFlagsAnalysis::Key;
ModuleShaderFlags ShaderFlagsAnalysis::run(Module &M,
ModuleAnalysisManager &AM) {
+ DXILResourceTypeMap &DRTM = AM.getResult<DXILResourceTypeAnalysis>(M);
+
ModuleShaderFlags MSFI;
- MSFI.initialize(M);
+ MSFI.initialize(M, DRTM);
+
return MSFI;
}
@@ -129,11 +154,22 @@ PreservedAnalyses ShaderFlagsAnalysisPrinter::run(Module &M,
// ShaderFlagsAnalysis and ShaderFlagsAnalysisPrinterPass
bool ShaderFlagsAnalysisWrapper::runOnModule(Module &M) {
- MSFI.initialize(M);
+ DXILResourceTypeMap &DRTM =
+ getAnalysis<DXILResourceTypeWrapperPass>().getResourceTypeMap();
+
+ MSFI.initialize(M, DRTM);
return false;
}
+void ShaderFlagsAnalysisWrapper::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DXILResourceTypeWrapperPass>();
+}
+
char ShaderFlagsAnalysisWrapper::ID = 0;
-INITIALIZE_PASS(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis",
- "DXIL Shader Flag Analysis", true, true)
+INITIALIZE_PASS_BEGIN(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis",
+ "DXIL Shader Flag Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(DXILResourceTypeWrapperPass)
+INITIALIZE_PASS_END(ShaderFlagsAnalysisWrapper, "dx-shader-flag-analysis",
+ "DXIL Shader Flag Analysis", true, true)
diff --git a/llvm/lib/Target/DirectX/DXILShaderFlags.h b/llvm/lib/Target/DirectX/DXILShaderFlags.h
index 2d60137..67ddab3 100644
--- a/llvm/lib/Target/DirectX/DXILShaderFlags.h
+++ b/llvm/lib/Target/DirectX/DXILShaderFlags.h
@@ -26,6 +26,7 @@
namespace llvm {
class Module;
class GlobalVariable;
+class DXILResourceTypeMap;
namespace dxil {
@@ -84,7 +85,7 @@ struct ComputedShaderFlags {
};
struct ModuleShaderFlags {
- void initialize(const Module &);
+ void initialize(const Module &, DXILResourceTypeMap &DRTM);
const ComputedShaderFlags &getFunctionFlags(const Function *) const;
const ComputedShaderFlags &getCombinedFlags() const { return CombinedSFMask; }
@@ -135,9 +136,7 @@ public:
bool runOnModule(Module &M) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
};
} // namespace dxil
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index f175f16..5afe6b2 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -393,6 +393,7 @@ public:
AU.addPreserved<DXILResourceBindingWrapperPass>();
AU.addPreserved<DXILResourceMDWrapper>();
AU.addPreserved<DXILMetadataAnalysisWrapperPass>();
+ AU.addPreserved<ShaderFlagsAnalysisWrapper>();
}
bool runOnModule(Module &M) override {
diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
index d4e35fb..ecb1bf7 100644
--- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
+++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp
@@ -99,8 +99,8 @@ public:
ScalarizerPassOptions DxilScalarOptions;
DxilScalarOptions.ScalarizeLoadStore = true;
addPass(createScalarizerPass(DxilScalarOptions));
- addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILTranslateMetadataLegacyPass());
+ addPass(createDXILOpLoweringLegacyPass());
addPass(createDXILPrepareModulePass());
}
};
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index a6be246..cd4b9f5 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -3369,6 +3369,13 @@ void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
break;
+ // If the caller was not successfully matched to a call in the IR/summary,
+ // there is no point in trying to clone for it as we can't update that call.
+ if (!CallerEdge->Caller->hasCall()) {
+ ++EI;
+ continue;
+ }
+
// Only need to process the ids along this edge pertaining to the given
// allocation.
auto CallerEdgeContextsForAlloc =
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 497fe4b..db89999 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -171,6 +171,11 @@ static cl::opt<std::string>
cl::desc("The default memprof options"),
cl::Hidden, cl::init(""));
+static cl::opt<bool>
+ SalvageStaleProfile("memprof-salvage-stale-profile",
+ cl::desc("Salvage stale MemProf profile"),
+ cl::init(false), cl::Hidden);
+
extern cl::opt<bool> MemProfReportHintedSizes;
static cl::opt<unsigned> MinMatchedColdBytePercent(
@@ -911,10 +916,38 @@ memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
return UndriftMaps;
}
+// Given a MemProfRecord, undrift all the source locations present in the
+// record in place.
+static void
+undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
+ memprof::MemProfRecord &MemProfRec) {
+ // Undrift a call stack in place.
+ auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
+ for (auto &F : CallStack) {
+ auto I = UndriftMaps.find(F.Function);
+ if (I == UndriftMaps.end())
+ continue;
+ auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
+ if (J == I->second.end())
+ continue;
+ auto &NewLoc = J->second;
+ F.LineOffset = NewLoc.LineOffset;
+ F.Column = NewLoc.Column;
+ }
+ };
+
+ for (auto &AS : MemProfRec.AllocSites)
+ UndriftCallStack(AS.CallStack);
+
+ for (auto &CS : MemProfRec.CallSites)
+ UndriftCallStack(CS);
+}
+
static void
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
const TargetLibraryInfo &TLI,
- std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
+ std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
+ DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
auto &Ctx = M.getContext();
// Previously we used getIRPGOFuncName() here. If F is local linkage,
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -962,6 +995,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
NumOfMemProfFunc++;
+ // If requested, undrfit MemProfRecord so that the source locations in it
+ // match those in the IR.
+ if (SalvageStaleProfile)
+ undriftMemProfRecord(UndriftMaps, *MemProfRec);
+
// Detect if there are non-zero column numbers in the profile. If not,
// treat all column numbers as 0 when matching (i.e. ignore any non-zero
// columns in the IR). The profiled binary might have been built with
@@ -1195,6 +1233,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
+ DenseMap<uint64_t, LocToLocMap> UndriftMaps;
+ if (SalvageStaleProfile)
+ UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
+
// Map from the stack has of each allocation context in the function profiles
// to the total profiled size (bytes), allocation type, and whether we matched
// it to an allocation in the IR.
@@ -1205,7 +1248,8 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
continue;
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
- readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
+ readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
+ UndriftMaps);
}
if (ClPrintMemProfMatchInfo) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d967813..8804989 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -21148,8 +21148,11 @@ bool SLPVectorizerPass::vectorizeCmpInsts(iterator_range<ItT> CmpInsts,
if (R.isDeleted(I))
continue;
for (Value *Op : I->operands())
- if (auto *RootOp = dyn_cast<Instruction>(Op))
+ if (auto *RootOp = dyn_cast<Instruction>(Op)) {
Changed |= vectorizeRootInstruction(nullptr, RootOp, BB, R);
+ if (R.isDeleted(I))
+ break;
+ }
}
// Try to vectorize operands as vector bundles.
for (CmpInst *I : CmpInsts) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index ca8614b..71f43ab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -821,6 +821,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+VPlan::VPlan(Loop *L) {
+ setEntry(VPIRBasicBlock::fromBasicBlock(L->getLoopPreheader()));
+ ScalarHeader = VPIRBasicBlock::fromBasicBlock(L->getHeader());
+}
+
VPlan::~VPlan() {
if (Entry) {
VPValue DummyValue;
@@ -847,19 +852,17 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
PredicatedScalarEvolution &PSE,
bool RequiresScalarEpilogueCheck,
bool TailFolded, Loop *TheLoop) {
- VPIRBasicBlock *Entry =
- VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
- VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
+ auto Plan = std::make_unique<VPlan>(TheLoop);
+ VPBlockBase *ScalarHeader = Plan->getScalarHeader();
+
// Connect entry only to vector preheader initially. Entry will also be
// connected to the scalar preheader later, during skeleton creation when
// runtime guards are added as needed. Note that when executing the VPlan for
// an epilogue vector loop, the original entry block here will be replaced by
// a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after
// generating code for the main vector loop.
- VPBlockUtils::connectBlocks(Entry, VecPreheader);
- VPIRBasicBlock *ScalarHeader =
- VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
- auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);
+ VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
+ VPBlockUtils::connectBlocks(Plan->getEntry(), VecPreheader);
// Create SCEV and VPValue for the trip count.
// We use the symbolic max backedge-taken-count, which works also when
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index d2699588..8dd94a2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -3813,13 +3813,6 @@ class VPlan {
DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
public:
- /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and
- /// with \p ScalarHeader wrapping the original header of the scalar loop.
- VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
- : VPlan(Entry, ScalarHeader) {
- TripCount = TC;
- }
-
/// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
/// wrapping the original header of the scalar loop.
VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
@@ -3829,6 +3822,18 @@ public:
"scalar header must be a leaf node");
}
+ /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and
+ /// with \p ScalarHeader wrapping the original header of the scalar loop.
+ VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
+ : VPlan(Entry, ScalarHeader) {
+ TripCount = TC;
+ }
+
+ /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
+ /// original preheader and scalar header of \p L, to be used as entry and
+ /// scalar header blocks of the new VPlan.
+ VPlan(Loop *L);
+
~VPlan();
void setEntry(VPBasicBlock *VPBB) {
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
index 8be7308..3feccff 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx90a.mir
@@ -746,7 +746,7 @@ name: smfma4x4_write_vgpr_dot_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
- $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: smfma4x4_read_srcc_vgpr_valu_write
# GCN: V_MFMA
@@ -945,7 +945,7 @@ name: dot_write_vgpr_different_dot_read_srcc
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
- $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
# GCN: V_DOT
@@ -955,7 +955,7 @@ name: dot_write_vgpr_different_dot_write
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
- $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
# GCN: V_DOT
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
index d59bcfb..5289198 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir
@@ -1071,7 +1071,7 @@ name: xdl_smfma4x4_write_vgpr_dot_write
body: |
bb.0:
$vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec
- $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write
# GCN: V_MFMA
@@ -1265,7 +1265,7 @@ name: dot_write_vgpr_different_dot_read_srcc
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
- $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_dot_write
# GCN: V_DOT
@@ -1275,7 +1275,7 @@ name: dot_write_vgpr_different_dot_write
body: |
bb.0:
$vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec
- $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec
...
# GCN-LABEL: name: dot_write_vgpr_different_valu_read
# GCN: V_DOT
diff --git a/llvm/test/CodeGen/DirectX/CreateHandle.ll b/llvm/test/CodeGen/DirectX/CreateHandle.ll
index 234d4e0..c9969c9 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandle.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandle.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s
+; RUN: opt -S -passes=dxil-translate-metadata,dxil-op-lower %s | FileCheck %s
; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s
; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count
diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
index aa143df..425084e 100644
--- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
+++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -passes=dxil-op-lower,dxil-translate-metadata %s | FileCheck %s
+; RUN: opt -S -passes=dxil-translate-metadata,dxil-op-lower %s | FileCheck %s
; RUN: opt -S -passes=dxil-pretty-printer %s 2>&1 >/dev/null | FileCheck --check-prefix=CHECK-PRETTY %s
; CHECK-PRETTY: Type Format Dim ID HLSL Bind Count
diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll
new file mode 100644
index 0000000..b694739
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/ShaderFlags/typed-uav-load-additional-formats.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S --passes="print-dx-shader-flags" 2>&1 %s | FileCheck %s
+; RUN: llc %s --filetype=obj -o - | obj2yaml | FileCheck %s --check-prefix=CHECK-OBJ
+
+target triple = "dxil-pc-shadermodel6.7-library"
+
+; CHECK-OBJ: - Name: SFI0
+; CHECK-OBJ: Flags:
+; CHECK-OBJ: TypedUAVLoadAdditionalFormats: true
+
+; CHECK: Combined Shader Flags for Module
+; CHECK-NEXT: Shader Flags Value: 0x00002000
+
+; CHECK: Note: shader requires additional functionality:
+; CHECK: Typed UAV Load Additional Formats
+
+; CHECK: Function multicomponent : 0x00002000
+define <4 x float> @multicomponent() #0 {
+ %res = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+ %val = call <4 x float> @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %res, i32 0)
+ ret <4 x float> %val
+}
+
+; CHECK: Function onecomponent : 0x00000000
+define float @onecomponent() #0 {
+ %res = call target("dx.TypedBuffer", float, 1, 0, 0)
+ @llvm.dx.handle.fromBinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+ %val = call float @llvm.dx.typedBufferLoad(
+ target("dx.TypedBuffer", float, 1, 0, 0) %res, i32 0)
+ ret float %val
+}
+
+; CHECK: Function noload : 0x00000000
+define void @noload(<4 x float> %val) #0 {
+ %res = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
+ @llvm.dx.handle.fromBinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+ call void @llvm.dx.typedBufferStore(
+ target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %res, i32 0,
+ <4 x float> %val)
+ ret void
+}
+
+attributes #0 = { convergent norecurse nounwind "hlsl.export"}
diff --git a/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
new file mode 100644
index 0000000..3ae5832
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/flatten-bug-117273.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-flatten-arrays,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+
+@ZerroInitArr = internal constant [2 x [3 x float]] [[3 x float] zeroinitializer, [3 x float] [float 1.000000e+00, float 1.000000e+00, float 1.000000e+00]], align 16
+
+
+define internal void @main() {
+; CHECK-LABEL: define internal void @main() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 1
+; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr [[TMP0]], align 16
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [24 x float], ptr @ZerroInitArr.1dim, i32 2
+; CHECK-NEXT: [[DOTI03:%.*]] = load float, ptr [[TMP1]], align 16
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = getelementptr [8 x [3 x float]], ptr @ZerroInitArr, i32 0, i32 1
+ %.i0 = load float, ptr %0, align 16
+ %1 = getelementptr [8 x [3 x float]], ptr @ZerroInitArr, i32 0, i32 2
+ %.i03 = load float, ptr %1, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/llc-pipeline.ll b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
index b3f1609..b071557 100644
--- a/llvm/test/CodeGen/DirectX/llc-pipeline.ll
+++ b/llvm/test/CodeGen/DirectX/llc-pipeline.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | \
-; RUN: grep -v "Verify generated machine code" | FileCheck %s
+; RUN: llc -filetype=asm -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s --check-prefixes=CHECK,CHECK-ASM
+; RUN: llc -filetype=obj -mtriple=dxil-pc-shadermodel6.3-library -debug-pass=Structure < %s -o /dev/null 2>&1 | grep -v "Verify generated machine code" | FileCheck %s --check-prefixes=CHECK,CHECK-OBJ
; REQUIRES: asserts
@@ -7,6 +7,11 @@
; CHECK-NEXT: Target Library Information
; CHECK-NEXT: DXIL Resource Type Analysis
; CHECK-NEXT: Target Transform Information
+
+; CHECK-OBJ-NEXT: Machine Module Information
+; CHECK-OBJ-NEXT: Machine Branch Probability Analysis
+; CHECK-OBJ-NEXT: Create Garbage Collector Module Metadata
+
; CHECK-NEXT: ModulePass Manager
; CHECK-NEXT: DXIL Finalize Linkage
; CHECK-NEXT: DXIL Intrinsic Expansion
@@ -17,11 +22,19 @@
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Scalarize vector operations
; CHECK-NEXT: DXIL Resource Binding Analysis
-; CHECK-NEXT: DXIL Op Lowering
; CHECK-NEXT: DXIL resource Information
; CHECK-NEXT: DXIL Shader Flag Analysis
; CHECK-NEXT: DXIL Module Metadata analysis
; CHECK-NEXT: DXIL Translate Metadata
+; CHECK-NEXT: DXIL Op Lowering
; CHECK-NEXT: DXIL Prepare Module
-; CHECK-NEXT: DXIL Metadata Pretty Printer
-; CHECK-NEXT: Print Module IR
+
+; CHECK-ASM-NEXT: DXIL Metadata Pretty Printer
+; CHECK-ASM-NEXT: Print Module IR
+
+; CHECK-OBJ-NEXT: DXIL Embedder
+; CHECK-OBJ-NEXT: DXContainer Global Emitter
+; CHECK-OBJ-NEXT: FunctionPass Manager
+; CHECK-OBJ-NEXT: Lazy Machine Block Frequency Analysis
+; CHECK-OBJ-NEXT: Machine Optimization Remark Emitter
+; CHECK-OBJ-NEXT: DXIL Assembly Printer
diff --git a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
index 5972520..4e522c6 100644
--- a/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
+++ b/llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
@@ -21,7 +21,6 @@
; CHECK-NOT: @groushared2dArrayofVectors
; CHECK-NOT: @groushared2dArrayofVectors.scalarized
-
define <4 x i32> @load_array_vec_test() #0 {
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -33,18 +32,13 @@ define <4 x i32> @load_array_vec_test() #0 {
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [2 x [3 x float]], ptr addrspace(3) [[TMP9]], i32 0, i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3)
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1
+; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2
+; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3
+; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([6 x float], ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
@@ -87,7 +81,7 @@ define <4 x i32> @load_vec_test() #0 {
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]]
+; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
@@ -121,18 +115,13 @@ define <4 x i32> @multid_load_test() #0 {
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x [3 x [4 x i32]]], ptr addrspace(3) [[TMP9]], i32 0, i32 1, i32 1
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1
+; CHECK-NEXT: [[DOTI12:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 1) to ptr addrspace(3)
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2
+; CHECK-NEXT: [[DOTI24:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 2) to ptr addrspace(3)
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
-; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3
+; CHECK-NEXT: [[DOTI36:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) getelementptr inbounds ([36 x i32], ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1), i32 3) to ptr addrspace(3)
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
diff --git a/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
new file mode 100644
index 0000000..25dc2c3
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/scalar-bug-117273.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='dxil-data-scalarization,dxil-flatten-arrays,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
+
+
+@StaticArr = internal constant [8 x <3 x float>] [<3 x float> zeroinitializer, <3 x float> splat (float 5.000000e-01), <3 x float> <float 1.000000e+00, float 5.000000e-01, float 5.000000e-01>, <3 x float> <float 5.000000e-01, float 1.000000e+00, float 5.000000e-01>, <3 x float> <float 5.000000e-01, float 5.000000e-01, float 1.000000e+00>, <3 x float> <float 5.000000e-01, float 1.000000e+00, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 5.000000e-01, float 1.000000e+00>, <3 x float> <float 1.000000e+00, float 1.000000e+00, float 5.000000e-01>], align 16
+
+; Function Attrs: alwaysinline convergent mustprogress norecurse nounwind
+define internal void @main() #1 {
+; CHECK-LABEL: define internal void @main() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[DOTI0:%.*]] = load float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), align 16
+; CHECK-NEXT: [[DOTI1:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), i32 1), align 4
+; CHECK-NEXT: [[DOTI2:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 1), i32 2), align 8
+; CHECK-NEXT: [[DOTI01:%.*]] = load float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), align 16
+; CHECK-NEXT: [[DOTI12:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), i32 1), align 4
+; CHECK-NEXT: [[DOTI23:%.*]] = load float, ptr getelementptr (float, ptr getelementptr inbounds ([24 x float], ptr @StaticArr.scalarized.1dim, i32 2), i32 2), align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %arrayidx = getelementptr inbounds [8 x <3 x float>], ptr @StaticArr, i32 0, i32 1
+ %2 = load <3 x float>, ptr %arrayidx, align 16
+ %arrayidx2 = getelementptr inbounds [8 x <3 x float>], ptr @StaticArr, i32 0, i32 2
+ %3 = load <3 x float>, ptr %arrayidx2, align 16
+ ret void
+}
diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll
index a32db8b..ed1e910 100644
--- a/llvm/test/CodeGen/DirectX/scalar-load.ll
+++ b/llvm/test/CodeGen/DirectX/scalar-load.ll
@@ -2,10 +2,10 @@
; Make sure we can load groupshared, static vectors and arrays of vectors
-@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
-@"vecData" = external addrspace(3) global <4 x i32>, align 4
+@arrayofVecData = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
+@vecData = external addrspace(3) global <4 x i32>, align 4
@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
-@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16
+@groushared2dArrayofVectors = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16
; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
@@ -19,12 +19,12 @@
; CHECK-LABEL: load_array_vec_test
-define <4 x i32> @load_array_vec_test() #0 {
- ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4
- %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
- %2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
- %3 = add <4 x i32> %1, %2
- ret <4 x i32> %3
+define <3 x float> @load_array_vec_test() #0 {
+ ; CHECK-COUNT-6: load float, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4
+ %1 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
+ %2 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
+ %3 = fadd <3 x float> %1, %2
+ ret <3 x float> %3
}
; CHECK-LABEL: load_vec_test
@@ -36,8 +36,14 @@ define <4 x i32> @load_vec_test() #0 {
; CHECK-LABEL: load_static_array_of_vec_test
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
- ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index
- ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4
+ ; CHECK: getelementptr inbounds [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index
+ ; CHECK: load i32, ptr {{.*}}, align 4
+ ; CHECK: getelementptr i32, ptr {{.*}}, i32 1
+ ; CHECK: load i32, ptr {{.*}}, align 4
+ ; CHECK: getelementptr i32, ptr {{.*}}, i32 2
+ ; CHECK: load i32, ptr {{.*}}, align 4
+ ; CHECK: getelementptr i32, ptr {{.*}}, i32 3
+ ; CHECK: load i32, ptr {{.*}}, align 4
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
%4 = load <4 x i32>, <4 x i32>* %3, align 4
ret <4 x i32> %4
diff --git a/llvm/test/MC/AMDGPU/dl-insts.s b/llvm/test/MC/AMDGPU/dl-insts.s
index 00e9bec..599734a 100644
--- a/llvm/test/MC/AMDGPU/dl-insts.s
+++ b/llvm/test/MC/AMDGPU/dl-insts.s
@@ -536,198 +536,6 @@ v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
// CHECK: encoding: [0x00,0x18,0xa7,0xd3,0x01,0x05,0x0e,0x1c]
v_dot2_u32_u16 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
-// CHECK: encoding: [0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
-// CHECK: encoding: [0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
-// CHECK: encoding: [0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
-// CHECK: encoding: [0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
-// CHECK: encoding: [0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
-// CHECK: encoding: [0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
-// CHECK: encoding: [0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
-// CHECK: encoding: [0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
-// CHECK: encoding: [0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
-// CHECK: encoding: [0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
-// CHECK: encoding: [0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
-// CHECK: encoding: [0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
-// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
-// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
-// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
-// CHECK: encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
//
// Test clamp.
diff --git a/llvm/test/MC/AMDGPU/gfx1030_err.s b/llvm/test/MC/AMDGPU/gfx1030_err.s
index 51498d3..87a0987 100644
--- a/llvm/test/MC/AMDGPU/gfx1030_err.s
+++ b/llvm/test/MC/AMDGPU/gfx1030_err.s
@@ -211,3 +211,365 @@ image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12
// missing dim
image_msaa_load v[1:4], v[5:7], s[8:15] dmask:0xf glc
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: missing dim operand
+
+// op_sel not allowed in dot opcodes with 4- or 8-bit packed data
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index d46f010..5aa19e5 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -1268,11 +1268,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo
v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi
// GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_cvt_pk_norm_i16_f16 v5, v1, v2
-// GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l
+// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
-v_cvt_pk_norm_i16_f16 v5, v255, v255
-// GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l
+// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
v_cvt_pk_norm_i16_f16 v5, s1, s2
// GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00]
@@ -1304,7 +1304,7 @@ v_cvt_pk_norm_i16_f16 v5, null, exec_lo
v_cvt_pk_norm_i16_f16 v5, -1, exec_hi
// GFX11: v_cvt_pk_norm_i16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x12,0xd7,0xc1,0xfe,0x00,0x00]
-v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 op_sel:[0,0,0]
+v_cvt_pk_norm_i16_f16 v5, 0.5, -m0
// GFX11: v_cvt_pk_norm_i16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x12,0xd7,0xf0,0xfa,0x00,0x40]
v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -1313,11 +1313,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
-v_cvt_pk_norm_u16_f16 v5, v1, v2
-// GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l
+// GFX11: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
-v_cvt_pk_norm_u16_f16 v5, v255, v255
-// GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h
+// GFX11: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+
+v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo|
+// GFX11: v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x12,0xd7,0xfd,0xd4,0x00,0x20]
+
+v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi|
+// GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l
+// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l
+// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
v_cvt_pk_norm_u16_f16 v5, s1, s2
// GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00]
@@ -1349,7 +1361,7 @@ v_cvt_pk_norm_u16_f16 v5, null, exec_lo
v_cvt_pk_norm_u16_f16 v5, -1, exec_hi
// GFX11: v_cvt_pk_norm_u16_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x13,0xd7,0xc1,0xfe,0x00,0x00]
-v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 op_sel:[0,0,0]
+v_cvt_pk_norm_u16_f16 v5, 0.5, -m0
// GFX11: v_cvt_pk_norm_u16_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x13,0xd7,0xf0,0xfa,0x00,0x40]
v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
@@ -1358,6 +1370,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l
+// GFX11: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h
+// GFX11: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo|
+// GFX11: v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x13,0xd7,0xfd,0xd4,0x00,0x20]
+
+v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi|
+// GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| ; encoding: [0xff,0x03,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+
v_cvt_pk_u16_f32 v5, v1, v2
// GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00]
@@ -1583,53 +1607,77 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo|
v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi|
// GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
-v_div_fixup_f16 v5, v1, v2, s3
-// GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+v_div_fixup_f16 v5.l, v1.l, v2.l, s3
+// GFX11: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+
+v_div_fixup_f16 v5.l, v255.l, s2, s105
+// GFX11: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+
+v_div_fixup_f16 v5.l, s1, v255.l, exec_hi
+// GFX11: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+
+v_div_fixup_f16 v5.l, s105, s105, exec_lo
+// GFX11: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+
+v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
-v_div_fixup_f16 v5, v255, s2, s105
-// GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l
+// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, s1, v255, exec_hi
-// GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
-v_div_fixup_f16 v5, s105, s105, exec_lo
-// GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+v_div_fixup_f16 v5.l, m0, 0.5, m0
+// GFX11: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
-v_div_fixup_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
-v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
+// GFX11: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, m0, 0.5, m0
-// GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
+// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
-v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
-// GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
+// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
-v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
-// GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
+// GFX11: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
-// GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
-// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+v_div_fixup_f16 v5.l, v255.h, s2, s105
+// GFX11: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
-v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+v_div_fixup_f16 v5.l, s1, v255.h, exec_hi
+// GFX11: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
-v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
-// GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
-// GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1
+// GFX11: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 ; encoding: [0x05,0x02,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2
+// GFX11: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
v_div_fixup_f32 v5, v1, v2, s3
// GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
@@ -3674,50 +3722,62 @@ v_max_u16 v5.l, v255.l, v255.h
v_max_u16 v255.h, 0xfe0b, vcc_hi
// GFX11: v_max_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-v_maxmin_f16 v5, v1, v2, s3
-// GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
+v_maxmin_f16 v5.l, v1.l, v2.l, s3
+// GFX11: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
+
+v_maxmin_f16 v5.l, v255.l, s2, s105
+// GFX11: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
+
+v_maxmin_f16 v5.l, s1, v255.l, exec_hi
+// GFX11: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
-v_maxmin_f16 v5, v255, s2, s105
-// GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
+v_maxmin_f16 v5.l, s105, s105, exec_lo
+// GFX11: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
-v_maxmin_f16 v5, s1, v255, exec_hi
-// GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
+v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
-v_maxmin_f16 v5, s105, s105, exec_lo
-// GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
+v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l
+// GFX11: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_maxmin_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
+v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
-v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_maxmin_f16 v5.l, m0, 0.5, m0
+// GFX11: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
-v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
+v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
-v_maxmin_f16 v5, m0, 0.5, m0
-// GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
+v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
+v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo|
-// GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_maxmin_f16 v5, null, exec_lo, -|0xfe0b|
-// GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2
+// GFX11: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
-v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc|
-// GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4
+// GFX11: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
-v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2
-// GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
+v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2
+// GFX11: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
-v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4
-// GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
+v_maxmin_f16 v5.l, v255.h, s2, s105
+// GFX11: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01]
-v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2
-// GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+v_maxmin_f16 v5.l, s1, v255.h, exec_hi
+// GFX11: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01]
+
+v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp div:2
+// GFX11: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
v_maxmin_f32 v5, v1, v2, s3
// GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00]
@@ -4751,50 +4811,62 @@ v_min_u16 v5.l, v255.l, v255.h
v_min_u16 v255.h, 0xfe0b, vcc_hi
// GFX11: v_min_u16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-v_minmax_f16 v5, v1, v2, s3
-// GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
+v_minmax_f16 v5.l, v1.l, v2.l, s3
+// GFX11: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
+
+v_minmax_f16 v5.l, v255.l, s2, s105
+// GFX11: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
+
+v_minmax_f16 v5.l, s1, v255.l, exec_hi
+// GFX11: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
+
+v_minmax_f16 v5.l, s105, s105, exec_lo
+// GFX11: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
+
+v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX11: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
-v_minmax_f16 v5, v255, s2, s105
-// GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
+v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l
+// GFX11: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_minmax_f16 v5, s1, v255, exec_hi
-// GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
+v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX11: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
-v_minmax_f16 v5, s105, s105, exec_lo
-// GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
+v_minmax_f16 v5.l, m0, 0.5, m0
+// GFX11: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
-v_minmax_f16 v5, vcc_lo, ttmp15, v3
-// GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
+v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX11: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
-v_minmax_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo|
+// GFX11: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
+v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b|
+// GFX11: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_minmax_f16 v5, m0, 0.5, m0
-// GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
+v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc|
+// GFX11: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_minmax_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
+v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2
+// GFX11: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
-v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo|
-// GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4
+// GFX11: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
-v_minmax_f16 v5, null, exec_lo, -|0xfe0b|
-// GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2
+// GFX11: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
-v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc|
-// GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_minmax_f16 v5.l, v255.h, s2, s105
+// GFX11: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01]
-v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2
-// GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
+v_minmax_f16 v5.l, s1, v255.h, exec_hi
+// GFX11: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01]
-v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4
-// GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
+v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX11: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2
-// GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp div:2
+// GFX11: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
v_minmax_f32 v5, v1, v2, s3
// GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
index 36d959f..62d6c8e 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s
@@ -1124,89 +1124,101 @@ v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound
v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3]
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0]
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3]
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
-v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1418,47 +1430,83 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15
-// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13]
+
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -2576,47 +2624,92 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_
v_max_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
-v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13]
-v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30]
-v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
-v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15
-// GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15
-// GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff]
-v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13]
+v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff]
-v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30]
+v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13]
+
+v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13]
+
+v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30]
v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -3620,47 +3713,92 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_
v_min_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
-v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15
-// GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13]
-v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
-// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30]
-v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15
-// GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
-v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
-v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
-v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
-v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff]
+
+v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13]
+
+v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13]
+
+v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX11: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30]
v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4757,32 +4895,33 @@ v_xor_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_
v_xor_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX11: [0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
-v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
-// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
// GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
index 479bd19e..75bbdbc 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s
@@ -628,29 +628,41 @@ v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x24,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0]
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x02,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0]
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -733,41 +745,74 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0]
+// GFX11: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
-v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x54,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x54,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -1582,41 +1627,80 @@ v_max_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
v_max_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x09,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x60,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
-v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x60,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05]
-v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x03,0x60,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x60,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+
+v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x60,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -2356,41 +2440,80 @@ v_min_u16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
v_min_u16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x40,0x0b,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x61,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
-v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x61,0xd6,0xea,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
-v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0xfe,0xa1,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xfa,0xc1,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, |v1.l|, -v2.l, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xf2,0x41,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x03,0x61,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX11: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x61,0xd6,0xea,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+
+v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX11: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x61,0xd6,0xe9,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -3057,32 +3180,33 @@ v_xor_b16_e64_dpp v5.l, v1.l, v2.h dpp8:[7,6,5,4,3,2,1,0] fi:1
v_xor_b16_e64_dpp v255.h, v255.l, v255.l dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX11: [0xff,0x40,0x64,0xd7,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
-v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
-v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX11: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
// GFX11: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s
new file mode 100644
index 0000000..451627e
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p_err.s
@@ -0,0 +1,219 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1100 %s 2>&1 | FileCheck --check-prefix=GFX11 --implicit-check-not=error: %s
+
+// op_sel not allowed in dot opcodes with 4- or 8-bit packed data
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
index 62bebd0..0309b2e 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
@@ -1286,11 +1286,11 @@ v_cvt_pk_i16_i32 v5, src_scc, vcc_lo
v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi
// GFX12: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
-v_cvt_pk_norm_i16_f16 v5, v1, v2
-// GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l
+// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
-v_cvt_pk_norm_i16_f16 v5, v255, v255
-// GFX12: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l
+// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
v_cvt_pk_norm_i16_f16 v5, s1, s2
// GFX12: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00]
@@ -1331,11 +1331,23 @@ v_cvt_pk_norm_i16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
// GFX12: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
-v_cvt_pk_norm_u16_f16 v5, v1, v2
-// GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l
+// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
-v_cvt_pk_norm_u16_f16 v5, v255, v255
-// GFX12: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+v_cvt_pknorm_i16_f16 v5, v1.h, v2.l
+// GFX12: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+
+v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h
+// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+
+v_cvt_pknorm_i16_f16 v5, v255.l, v255.h
+// GFX12: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l
+// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l
+// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
v_cvt_pk_norm_u16_f16 v5, s1, s2
// GFX12: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00]
@@ -1376,6 +1388,18 @@ v_cvt_pk_norm_u16_f16 v5, -src_scc, |vcc_lo| op_sel:[1,0,0]
v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0]
// GFX12: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l
+// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+v_cvt_pknorm_u16_f16 v5, v1.h, v2.l
+// GFX12: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h
+// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+
+v_cvt_pknorm_u16_f16 v5, v255.l, v255.h
+// GFX12: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+
v_cvt_pk_u16_f32 v5, v1, v2
// GFX12: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00]
@@ -1601,50 +1625,62 @@ v_cvt_pk_norm_u16_f32 v5, -src_scc, |vcc_lo|
v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi|
// GFX12: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
-v_div_fixup_f16 v5, v1, v2, s3
-// GFX12: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+v_div_fixup_f16 v5.l, v1.l, v2.l, s3
+// GFX12: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+
+v_div_fixup_f16 v5.l, v255.l, s2, s105
+// GFX12: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+
+v_div_fixup_f16 v5.l, s1, v255.l, exec_hi
+// GFX12: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+
+v_div_fixup_f16 v5.l, s105, s105, exec_lo
+// GFX12: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+
+v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l
+// GFX12: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
-v_div_fixup_f16 v5, v255, s2, s105
-// GFX12: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l
+// GFX12: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, s1, v255, exec_hi
-// GFX12: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15|
+// GFX12: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
-v_div_fixup_f16 v5, s105, s105, exec_lo
-// GFX12: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+v_div_fixup_f16 v5.l, m0, 0.5, m0
+// GFX12: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
-v_div_fixup_f16 v5, vcc_lo, ttmp15, v3
-// GFX12: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi
+// GFX12: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
-v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255
-// GFX12: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
+// GFX12: v_div_fixup_f16 v5.h, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
-v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15|
-// GFX12: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
+// GFX12: v_div_fixup_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, m0, 0.5, m0
-// GFX12: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
+// GFX12: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
-v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi
-// GFX12: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
+// GFX12: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
-v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1]
-// GFX12: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| op_sel:[1,1,1,1] ; encoding: [0x05,0x7d,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
+// GFX12: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
-v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[0,0,0,0]
-// GFX12: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
+// GFX12: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0]
-// GFX12: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+v_div_fixup_f16 v5.l, v255.h, s2, s105
+// GFX12: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
-v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0]
-// GFX12: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+v_div_fixup_f16 v5.l, s1, v255.h, exec_hi
+// GFX12: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
-v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
-// GFX12: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h
+// GFX12: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
-v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
-// GFX12: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null clamp
+// GFX12: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
v_div_fixup_f32 v5, v1, v2, s3
// GFX12: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
index e7b12ec..b769324 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s
@@ -1442,6 +1442,12 @@ v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1484,6 +1490,12 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
+
v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0]
// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
@@ -1700,53 +1712,68 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0
v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15
-// GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15
-// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
-// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1
+// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30]
v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0]
// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -5203,20 +5230,20 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:
v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
-// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
+// GFX12: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
-// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1
+// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf
// GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
index b879e59..f76dd26 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s
@@ -840,6 +840,12 @@ v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x12,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -852,6 +858,12 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x13,0x13,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
@@ -939,47 +951,62 @@ v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1
v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0
// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x03,0x22,0xd7,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x06,0x54,0xd6,0xea,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 dpp8:[7,6,5,4,3,2,1,0] fi:1
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0x05,0x13,0x54,0xd6,0xea,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
-// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x87,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0
+// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0xc7,0x54,0xd6,0xe9,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -3472,20 +3499,20 @@ v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0
v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1
// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
-v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
-// GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0]
+// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
-v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
-// GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1
+// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0]
// GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x48,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s
index a7d42c6..5ed2091 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p_err.s
@@ -131,3 +131,221 @@ v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[1,0,0]
v_dot4_f32_bf8_bf8 v0, v1, v2, v3 neg_hi:[0,1,0]
// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: invalid neg_hi operand
+
+// op_sel not allowed in dot opcodes with 4- or 8-bit packed data
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_iu8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX12: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
diff --git a/llvm/test/MC/AMDGPU/gfx908_err.s b/llvm/test/MC/AMDGPU/gfx908_err.s
new file mode 100644
index 0000000..d39e9b5
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx908_err.s
@@ -0,0 +1,436 @@
+// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx908 %s 2>&1 | FileCheck --check-prefix=GFX908 --implicit-check-not=error: %s
+
+// op_sel not allowed in dot opcodes with 4- or 8-bit packed data
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX908: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
diff --git a/llvm/test/MC/AMDGPU/gfx90a_err.s b/llvm/test/MC/AMDGPU/gfx90a_err.s
index 7b2acc6..a05c7fb 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err.s
@@ -239,3 +239,438 @@ scratch_load_lds_dword v2, off
ds_read_b32 v0, v1 gds
// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: gds modifier is not supported on this GPU
+
+// op_sel not allowed in dot opcodes with 4- or 8-bit packed data
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot4c_i32_i8 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[0,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,0]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
+v_dot8c_i32_i4 v0, v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1]
+// GFX90A: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand.
+
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index 857da7c..9790473 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -1375,10 +1375,16 @@
# GFX11: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00
# GFX11: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00]
@@ -1419,11 +1425,41 @@
0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
# GFX11: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+
+0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+
0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00
-# GFX11: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00
-# GFX11: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00
# GFX11: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00]
@@ -1464,6 +1500,30 @@
0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
# GFX11: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+
+0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+
0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00
# GFX11: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00]
@@ -1690,53 +1750,125 @@
# GFX11: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00
-# GFX11: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W32-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01
-# GFX11: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01
-# GFX11: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01
-# GFX11: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04
-# GFX11: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
-# GFX11: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1
-# GFX11: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01
-# GFX11: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01
-# GFX11: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1
-# GFX11: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00
-# GFX11: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3
-# GFX11: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43
-# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W32-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W32-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W64-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W64-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23
-# GFX11: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W32-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W32-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W64-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W64-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
-# GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
# CHECK: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b
-# GFX11: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W32-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W32-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W64-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W64-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+
+0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01
+# W32-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+
+0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01
+# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+
+0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00
# GFX11: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
@@ -4157,49 +4289,118 @@
# W64-FAKE16: v_max_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x09,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00
-# GFX11: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
+# W32-REAL16: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
+# W32-FAKE16: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
+# W64-REAL16: v_maxmin_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
+# W64-FAKE16: v_maxmin_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x60,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01
-# GFX11: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W32-REAL16: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x60,0xd6,0xff,0x05,0xa4,0x01]
0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01
-# GFX11: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W32-REAL16: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x60,0xd6,0x01,0xfe,0xff,0x01]
0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01
-# GFX11: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-REAL16: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x60,0xd6,0x69,0xd2,0xf8,0x01]
0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04
-# GFX11: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-REAL16: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-FAKE16: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-REAL16: v_maxmin_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-FAKE16: v_maxmin_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x60,0xd6,0x6a,0xf6,0x0c,0x04]
0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
-# GFX11: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1
-# GFX11: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-REAL16: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-FAKE16: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-REAL16: v_maxmin_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-FAKE16: v_maxmin_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x60,0xd6,0x7b,0xfa,0xed,0xe1]
0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01
-# GFX11: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-REAL16: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x60,0xd6,0x7d,0xe0,0xf5,0x01]
0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01
-# GFX11: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
+# W32-REAL16: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x60,0xd6,0x7e,0x82,0xad,0x01]
0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1
-# GFX11: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-REAL16: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-FAKE16: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-REAL16: v_maxmin_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-FAKE16: v_maxmin_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x60,0xd6,0x7f,0xf8,0xa8,0xa1]
0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00
-# GFX11: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_maxmin_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x60,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3
-# GFX11: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-REAL16: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-FAKE16: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-REAL16: v_maxmin_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-FAKE16: v_maxmin_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x60,0xd6,0xc1,0xfe,0xf4,0xc3]
0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b
-# GFX11: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W32-REAL16: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W32-FAKE16: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W64-REAL16: v_maxmin_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W64-FAKE16: v_maxmin_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x60,0xd6,0xf0,0xfa,0xc0,0x4b]
0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33
-# GFX11: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
+# W32-REAL16: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
+# W32-FAKE16: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
+# W64-REAL16: v_maxmin_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
+# W64-FAKE16: v_maxmin_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x60,0xd6,0xfd,0xd4,0x04,0x33]
0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00
-# GFX11: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_maxmin_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+
+0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01
+# W32-REAL16: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x60,0xd6,0xff,0x05,0xa4,0x01]
+
+0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01
+# W32-REAL16: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_maxmin_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_maxmin_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x60,0xd6,0x01,0xfe,0xff,0x01]
+
+0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_maxmin_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_maxmin_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x60,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00
# GFX11: v_maxmin_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5e,0xd6,0x01,0x05,0x0e,0x00]
@@ -5647,49 +5848,118 @@
# W64-FAKE16: v_min_u16 v255, 0xfe0b, vcc_hi ; encoding: [0xff,0x00,0x0b,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00
-# GFX11: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
+# W32-REAL16: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
+# W32-FAKE16: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
+# W64-REAL16: v_minmax_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
+# W64-FAKE16: v_minmax_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x61,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01
-# GFX11: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W32-REAL16: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_minmax_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x61,0xd6,0xff,0x05,0xa4,0x01]
0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01
-# GFX11: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W32-REAL16: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x61,0xd6,0x01,0xfe,0xff,0x01]
0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01
-# GFX11: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-REAL16: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-FAKE16: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-FAKE16: v_minmax_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x61,0xd6,0x69,0xd2,0xf8,0x01]
0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04
-# GFX11: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-REAL16: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-FAKE16: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-REAL16: v_minmax_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-FAKE16: v_minmax_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x61,0xd6,0x6a,0xf6,0x0c,0x04]
0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
-# GFX11: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1
-# GFX11: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-REAL16: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-FAKE16: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-REAL16: v_minmax_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-FAKE16: v_minmax_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x61,0xd6,0x7b,0xfa,0xed,0xe1]
0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01
-# GFX11: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-REAL16: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-FAKE16: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-FAKE16: v_minmax_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x61,0xd6,0x7d,0xe0,0xf5,0x01]
0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01
-# GFX11: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
+# W32-REAL16: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
+# W32-FAKE16: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
+# W64-FAKE16: v_minmax_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x61,0xd6,0x7e,0x82,0xad,0x01]
0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1
-# GFX11: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-REAL16: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-FAKE16: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-REAL16: v_minmax_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-FAKE16: v_minmax_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x61,0xd6,0x7f,0xf8,0xa8,0xa1]
0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00
-# GFX11: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_minmax_f16 v5.l, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_minmax_f16 v5, null, exec_lo, -|0xfe0b| ; encoding: [0x05,0x04,0x61,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3
-# GFX11: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-REAL16: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-FAKE16: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-REAL16: v_minmax_f16 v5.l, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-FAKE16: v_minmax_f16 v5, -1, -|exec_hi|, -|src_scc| ; encoding: [0x05,0x06,0x61,0xd6,0xc1,0xfe,0xf4,0xc3]
0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b
-# GFX11: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W32-REAL16: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W32-FAKE16: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W64-REAL16: v_minmax_f16 v5.l, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
+# W64-FAKE16: v_minmax_f16 v5, 0.5, -m0, 0.5 mul:2 ; encoding: [0x05,0x00,0x61,0xd6,0xf0,0xfa,0xc0,0x4b]
0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33
-# GFX11: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
+# W32-REAL16: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
+# W32-FAKE16: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
+# W64-REAL16: v_minmax_f16 v5.l, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
+# W64-FAKE16: v_minmax_f16 v5, -src_scc, |vcc_lo|, -1 mul:4 ; encoding: [0x05,0x02,0x61,0xd6,0xfd,0xd4,0x04,0x33]
0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00
-# GFX11: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_minmax_f16 v255.l, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null clamp div:2 ; encoding: [0xff,0x83,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+
+0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01
+# W32-REAL16: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_minmax_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_minmax_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x61,0xd6,0xff,0x05,0xa4,0x01]
+
+0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01
+# W32-REAL16: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_minmax_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_minmax_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x61,0xd6,0x01,0xfe,0xff,0x01]
+
+0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_minmax_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_minmax_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+
+0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_minmax_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_minmax_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2 ; encoding: [0xff,0xc3,0x61,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00
# GFX11: v_minmax_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x5f,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
index 93992cb..4dc4f46 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt
@@ -2113,46 +2113,118 @@
# W64-FAKE16: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x60,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x60,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x60,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x60,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff
-# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x60,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01
-# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x60,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x60,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30
-# GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+
+0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x60,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x60,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x60,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+
+0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x60,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+
+0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -2833,46 +2905,118 @@
# W64-FAKE16: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff]
0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff]
0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff]
0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff]
0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x61,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x61,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff]
0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x61,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x61,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff
-# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x61,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01
-# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x61,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01]
0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13
-# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x61,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13]
0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30
-# GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+
+0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
+# W32-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x61,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x61,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x61,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01]
+
+0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x61,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13]
+
+0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
@@ -4113,130 +4257,322 @@
# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff
-# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01
-# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+
+0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+
+0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+
+0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
-# GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
# GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
index a339d55..dcf57ee 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt
@@ -1141,40 +1141,106 @@
# W64-FAKE16: v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x60,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x60,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x60,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x60,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x60,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x60,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05
-# GFX11: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x60,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00
-# GFX11: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_maxmin_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+
+0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x60,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x60,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x60,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+
+0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05
+# W32-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_maxmin_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x60,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+
+0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00
+# W32-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_maxmin_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_maxmin_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x60,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX11: v_maxmin_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -1585,40 +1651,106 @@
# W64-FAKE16: v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x61,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, |v1|, v2, -ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x61,0xd6,0xe9,0x04,0xee,0x81,0x01,0x77,0x39,0x05]
0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x61,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, v2, |exec_lo| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x04,0x61,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, null dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x03,0x61,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x05,0x61,0xd6,0xe9,0x04,0x06,0xab,0x01,0x77,0x39,0x05]
0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05
-# GFX11: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x06,0x61,0xd6,0xe9,0x04,0xc2,0xd3,0x01,0x77,0x39,0x05]
0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00
-# GFX11: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_minmax_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x87,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+
+0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
+# W32-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x61,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x61,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x61,0xd6,0xe9,0x04,0x06,0x2b,0x01,0x77,0x39,0x05]
+
+0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05
+# W32-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_minmax_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_minmax_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x61,0xd6,0xe9,0x04,0xc2,0x73,0x01,0x77,0x39,0x05]
+
+0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00
+# W32-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_minmax_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_minmax_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x61,0xd6,0xea,0xfe,0xf7,0xfb,0xff,0x00,0x00,0x00]
0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX11: v_minmax_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
@@ -2349,70 +2481,202 @@
# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX11: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05
-# GFX11: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+
+0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+
+0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+
+0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
-# GFX11: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX11: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
index d31e96af4..4c2060a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt
@@ -1393,10 +1393,16 @@
# GFX12: v_cvt_pk_i16_i32 v255, 0xaf123456, vcc_hi ; encoding: [0xff,0x00,0x24,0xd7,0xff,0xd6,0x00,0x00,0x56,0x34,0x12,0xaf]
0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x12,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00
# GFX12: v_cvt_pk_norm_i16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x12,0xd7,0x01,0x04,0x00,0x00]
@@ -1437,11 +1443,29 @@
0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
# GFX12: v_cvt_pk_norm_i16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x12,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x12,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x12,0xd7,0xff,0xff,0x03,0x00]
+
0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00
-# GFX12: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.l, v2.l ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x05,0x02,0x00]
0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00
-# GFX12: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.l ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x13,0xd7,0xff,0xff,0x03,0x00]
0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00
# GFX12: v_cvt_pk_norm_u16_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x13,0xd7,0x01,0x04,0x00,0x00]
@@ -1482,6 +1506,18 @@
0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00
# GFX12: v_cvt_pk_norm_u16_f16 v255, -|0xfe0b|, -|vcc_hi| op_sel:[0,1,0] ; encoding: [0xff,0x13,0x13,0xd7,0xff,0xd6,0x00,0x60,0x0b,0xfe,0x00,0x00]
+0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v1, v2 op_sel:[1,0,0] ; encoding: [0x05,0x08,0x13,0xd7,0x01,0x05,0x02,0x00]
+
+0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16 v5, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16 v5, v255, v255 op_sel:[0,1,0] ; encoding: [0x05,0x10,0x13,0xd7,0xff,0xff,0x03,0x00]
+
0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00
# GFX12: v_cvt_pk_u16_f32 v5, v1, v2 ; encoding: [0x05,0x00,0x07,0xd7,0x01,0x05,0x02,0x00]
@@ -1708,49 +1744,118 @@
# GFX12: v_cvt_pk_norm_u16_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x22,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00
-# GFX12: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W32-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.l, v1.l, v2.l, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x54,0xd6,0x01,0x05,0x0e,0x00]
0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01
-# GFX12: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, v255.l, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 ; encoding: [0x05,0x00,0x54,0xd6,0xff,0x05,0xa4,0x01]
0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01
-# GFX12: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.l, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi ; encoding: [0x05,0x00,0x54,0xd6,0x01,0xfe,0xff,0x01]
0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01
-# GFX12: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, s105, s105, exec_lo ; encoding: [0x05,0x00,0x54,0xd6,0x69,0xd2,0xf8,0x01]
0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04
-# GFX12: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W32-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-REAL16: v_div_fixup_f16 v5.l, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
+# W64-FAKE16: v_div_fixup_f16 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x54,0xd6,0x6a,0xf6,0x0c,0x04]
0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
-# GFX12: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.l ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 ; encoding: [0x05,0x00,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1
-# GFX12: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W32-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-REAL16: v_div_fixup_f16 v5.l, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
+# W64-FAKE16: v_div_fixup_f16 v5, -|ttmp15|, -|src_scc|, -|ttmp15| ; encoding: [0x05,0x07,0x54,0xd6,0x7b,0xfa,0xed,0xe1]
0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01
-# GFX12: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, m0, 0.5, m0 ; encoding: [0x05,0x00,0x54,0xd6,0x7d,0xe0,0xf5,0x01]
0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01
-# GFX12: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W32-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, |exec_lo|, -1, vcc_hi ; encoding: [0x05,0x01,0x54,0xd6,0x7e,0x82,0xad,0x01]
0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1
-# GFX12: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W32-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-REAL16: v_div_fixup_f16 v5.l, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
+# W64-FAKE16: v_div_fixup_f16 v5, -|exec_hi|, null, -|vcc_lo| ; encoding: [0x05,0x05,0x54,0xd6,0x7f,0xf8,0xa8,0xa1]
0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00
-# GFX12: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.h, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, null, exec_lo, -|0xfe0b| op_sel:[1,1,1,1] ; encoding: [0x05,0x7c,0x54,0xd6,0x7c,0xfc,0xfc,0x83,0x0b,0xfe,0x00,0x00]
0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3
-# GFX12: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W32-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-REAL16: v_div_fixup_f16 v5.l, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
+# W64-FAKE16: v_div_fixup_f16 v5, -1, -|exec_hi|, -|src_scc| op_sel:[1,0,0,0] ; encoding: [0x05,0x0e,0x54,0xd6,0xc1,0xfe,0xf4,0xc3]
0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43
-# GFX12: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W32-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W32-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W64-REAL16: v_div_fixup_f16 v5.l, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
+# W64-FAKE16: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x43]
0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23
-# GFX12: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W32-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W32-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W64-REAL16: v_div_fixup_f16 v5.l, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+# W64-FAKE16: v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0] ; encoding: [0x05,0x22,0x54,0xd6,0xfd,0xd4,0x04,0x23]
+
+0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+
+0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01
+# W32-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, v255.h, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, v255, s2, s105 op_sel:[1,0,0,0] ; encoding: [0x05,0x08,0x54,0xd6,0xff,0x05,0xa4,0x01]
+
+0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01
+# W32-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W32-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-REAL16: v_div_fixup_f16 v5.l, s1, v255.h, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+# W64-FAKE16: v_div_fixup_f16 v5, s1, v255, exec_hi op_sel:[0,1,0,0] ; encoding: [0x05,0x10,0x54,0xd6,0x01,0xfe,0xff,0x01]
+
+0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00
+# W32-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v5.l, vcc_hi, 0xfe0b, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v5, vcc_hi, 0xfe0b, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0x6b,0xfe,0xfd,0x07,0x0b,0xfe,0x00,0x00]
0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
-# GFX12: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16 v255.h, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00
# GFX12: v_div_fixup_f32 v5, v1, v2, s3 ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
index 53c1f2e..c64fe39 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt
@@ -4452,133 +4452,316 @@
# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+
+0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff]
0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff]
0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01]
+
+0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+
+0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13]
0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff]
0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff]
0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff]
0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff]
0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff]
0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff
-# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff]
0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01
-# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01]
0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13]
+
+0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+
+0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
+
+0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff]
+
+0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01]
+
+0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13]
0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30
-# GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30]
0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff
# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
index a77b90c..9ed20c7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt
@@ -2622,73 +2622,196 @@
# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00]
0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x12,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+
+0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x12,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05]
0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x13,0xd7,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05]
+
+0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+
+0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x13,0xd7,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05]
0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00
-# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xea,0xfe,0x03,0x60,0xff,0x00,0x00,0x00]
0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xee,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05]
0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xaa,0x01,0x01,0x77,0x39,0x05]
0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x54,0xd6,0xe9,0x04,0xf6,0x81,0x01,0x77,0x39,0x05]
0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x02,0x54,0xd6,0xe9,0x04,0xfe,0x41,0x01,0x77,0x39,0x05]
0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x7c,0x54,0xd6,0xe9,0x04,0xfa,0x21,0x01,0x77,0x39,0x05]
0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0b,0x54,0xd6,0xe9,0x04,0xf2,0x61,0x01,0x77,0x39,0x05]
0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x15,0x54,0xd6,0xe9,0x04,0x06,0xa3,0x01,0x77,0x39,0x05]
0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05
-# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x26,0x54,0xd6,0xe9,0x04,0xc2,0xc3,0x01,0x77,0x39,0x05]
+
+0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+
+0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x78,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
+
+0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05]
+
+0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x0a,0x54,0xd6,0xe9,0x04,0x06,0x23,0x01,0x77,0x39,0x05]
+
+0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05
+# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x13,0x54,0xd6,0xe9,0x04,0xc2,0x63,0x01,0x77,0x39,0x05]
0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00
-# GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
+# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xea,0xfe,0xf7,0xe3,0xff,0x00,0x00,0x00]
0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05
# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt
index aa3b4c7..310e23f 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx908-dl-insts.txt
@@ -778,291 +778,15 @@
# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
0x00,0x40,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x50,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x48,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x58,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x10,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x08,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_i32_i8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x18,0xa8,0xd3,0x01,0x05,0x0e,0x1c
-
# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
0x00,0x40,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x50,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x48,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x58,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x10,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x08,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot4_u32_u8 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x18,0xa9,0xd3,0x01,0x05,0x0e,0x1c
-
# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
0x00,0x40,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x50,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x48,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x58,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x10,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x08,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_i32_i4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x18,0xaa,0xd3,0x01,0x05,0x0e,0x1c
-
# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 ; encoding: [0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c]
0x00,0x40,0xab,0xd3,0x01,0x05,0x0e,0x1c
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] ; encoding: [0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x50,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x48,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] ; encoding: [0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x58,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[0,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,0,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel_hi:[1,1,0] ; encoding: [0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x00,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[0,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x10,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,0,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x08,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,0,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04]
-0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x04
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[0,1,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14]
-0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x14
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,0,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c]
-0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x0c
-
-# CHECK: v_dot8_u32_u4 v0, v1, v2, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] ; encoding: [0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c]
-0x00,0x18,0xab,0xd3,0x01,0x05,0x0e,0x1c
-
# CHECK: v_dot2_f32_f16 v0, v1, v2, v3 clamp ; encoding: [0x00,0xc0,0xa3,0xd3,0x01,0x05,0x0e,0x1c]
0x00,0xc0,0xa3,0xd3,0x01,0x05,0x0e,0x1c
diff --git a/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
new file mode 100644
index 0000000..e7fbb3c
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-missing-callsite.ll
@@ -0,0 +1,70 @@
+;; Test callsite context graph generation for simple call graph with
+;; two memprof contexts and no inlining, where one callsite required for
+;; cloning is missing (e.g. unmatched).
+;;
+;; Original code looks like:
+;;
+;; char *foo() {
+;; return new char[10];
+;; }
+;;
+;; int main(int argc, char **argv) {
+;; char *x = foo();
+;; char *y = foo();
+;; memset(x, 0, 10);
+;; memset(y, 0, 10);
+;; delete[] x;
+;; sleep(200);
+;; delete[] y;
+;; return 0;
+;; }
+
+; RUN: opt -thinlto-bc -memprof-report-hinted-sizes %s >%t.o
+; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t.o,main,plx \
+; RUN: -r=%t.o,_Znam, \
+; RUN: -memprof-report-hinted-sizes \
+; RUN: -pass-remarks=memprof-context-disambiguation -save-temps \
+; RUN: -o %t.out 2>&1 | FileCheck %s --implicit-check-not "call in clone _Z3foov" \
+; RUN: --check-prefix=SIZESUNHINTED
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not "\"memprof\"=\"cold\""
+
+source_filename = "memprof-missing-callsite.ll"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+entry:
+ ;; Missing callsite metadata blocks cloning
+ %call = call ptr @_Z3foov()
+ %call1 = call ptr @_Z3foov()
+ ret i32 0
+}
+
+define internal ptr @_Z3foov() #0 {
+entry:
+ %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
+ ret ptr null
+}
+
+declare ptr @_Znam(i64)
+
+; uselistorder directives
+uselistorder ptr @_Z3foov, { 1, 0 }
+
+attributes #0 = { noinline optnone }
+
+!2 = !{!3, !5}
+!3 = !{!4, !"notcold", !10}
+!4 = !{i64 9086428284934609951, i64 8632435727821051414}
+!5 = !{!6, !"cold", !11, !12}
+!6 = !{i64 9086428284934609951, i64 -3421689549917153178}
+!7 = !{i64 9086428284934609951}
+!10 = !{i64 123, i64 100}
+!11 = !{i64 456, i64 200}
+!12 = !{i64 789, i64 300}
+
+; SIZESUNHINTED: NotCold full allocation context 123 with total size 100 is NotColdCold after cloning
+; SIZESUNHINTED: Cold full allocation context 456 with total size 200 is NotColdCold after cloning
+; SIZESUNHINTED: Cold full allocation context 789 with total size 300 is NotColdCold after cloning
diff --git a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
index 49c22bf..a6863cb 100644
--- a/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
+++ b/llvm/test/ThinLTO/X86/memprof-tailcall-nonunique.ll
@@ -20,7 +20,9 @@
; RUN: -stats -debug -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
-; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+;; We should not see any type of cold attribute or cloning applied
+; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --implicit-check-not cold \
+; RUN: --implicit-check-not ".memprof."
;; Try again but with distributed ThinLTO
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
@@ -39,26 +41,23 @@
; RUN: -o %t2.out 2>&1 | FileCheck %s --check-prefix=STATS --check-prefix=DEBUG
;; Run ThinLTO backend
+;; We should not see any type of cold attribute or cloning applied
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-import-summary=%t.o.thinlto.bc \
-; RUN: -stats %t.o -S 2>&1 | FileCheck %s --check-prefix=IR
+; RUN: -stats %t.o -S 2>&1 | FileCheck %s --implicit-check-not cold \
+; RUN: --implicit-check-not ".memprof."
; DEBUG: Not found through unique tail call chain: 17377440600225628772 (_Z3barv) from 15822663052811949562 (main) that actually called 8716735811002003409 (xyz) (found multiple possible chains)
; STATS: 1 memprof-context-disambiguation - Number of profiled callees found via multiple tail call chains
-;; Check that all calls in the IR are to the original functions, leading to a
-;; non-cold operator new call.
-
source_filename = "tailcall-nonunique.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: noinline
-; IR-LABEL: @_Z3barv()
define dso_local ptr @_Z3barv() local_unnamed_addr #0 {
entry:
- ; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !9
ret ptr %call
}
@@ -67,54 +66,43 @@ entry:
declare ptr @_Znam(i64) #1
; Function Attrs: noinline
-; IR-LABEL: @_Z5blah1v()
define dso_local ptr @_Z5blah1v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z5blah2v()
define dso_local ptr @_Z5blah2v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z4baz1v()
define dso_local ptr @_Z4baz1v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z5blah1v()
%call = tail call ptr @_Z5blah1v()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z4baz2v()
define dso_local ptr @_Z4baz2v() local_unnamed_addr #0 {
entry:
- ; IR: call ptr @_Z5blah2v()
%call = tail call ptr @_Z5blah2v()
ret ptr %call
}
; Function Attrs: noinline
-; IR-LABEL: @_Z3foob(i1 %b)
define dso_local ptr @_Z3foob(i1 %b) local_unnamed_addr #0 {
entry:
br i1 %b, label %if.then, label %if.else
if.then: ; preds = %entry
- ; IR: call ptr @_Z4baz1v()
%call = tail call ptr @_Z4baz1v()
br label %return
if.else: ; preds = %entry
- ; IR: call ptr @_Z4baz2v()
%call1 = tail call ptr @_Z4baz2v()
br label %return
@@ -124,29 +112,21 @@ return: ; preds = %if.else, %if.then
}
; Function Attrs: noinline
-; IR-LABEL: @xyz()
define dso_local i32 @xyz() local_unnamed_addr #0 {
delete.end13:
- ; IR: call ptr @_Z3foob(i1 true)
%call = tail call ptr @_Z3foob(i1 true)
- ; IR: call ptr @_Z3foob(i1 true)
%call1 = tail call ptr @_Z3foob(i1 true)
- ; IR: call ptr @_Z3foob(i1 false)
%call2 = tail call ptr @_Z3foob(i1 false)
- ; IR: call ptr @_Z3foob(i1 false)
%call3 = tail call ptr @_Z3foob(i1 false)
ret i32 0
}
define dso_local i32 @main() local_unnamed_addr #0 {
delete.end13:
- ; IR: call i32 @xyz()
%call1 = tail call i32 @xyz(), !callsite !11
ret i32 0
}
-; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
-
attributes #0 = { noinline }
attributes #1 = { nobuiltin allocsize(0) }
attributes #2 = { builtin allocsize(0) }
diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
index 75cebae..bcb6ebc 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/fix_clone_checking.ll
@@ -7,14 +7,8 @@
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | FileCheck %s
-;; Make sure we created some clones
-; CHECK: created clone A.memprof.1
-; CHECK: created clone C.memprof.1
-; CHECK: created clone D.memprof.1
-; CHECK: created clone E.memprof.1
-; CHECK: created clone B.memprof.1
-; CHECK: created clone F.memprof.1
-; CHECK: created clone G.memprof.1
+;; Make sure we successfully created at least one clone
+; CHECK: created clone {{.*}}.memprof.1
; ModuleID = '<stdin>'
source_filename = "reduced.ll"
diff --git a/llvm/test/Transforms/PGOProfile/memprof-undrift.test b/llvm/test/Transforms/PGOProfile/memprof-undrift.test
new file mode 100644
index 0000000..45fb2f2
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof-undrift.test
@@ -0,0 +1,174 @@
+; REQUIRES: x86_64-linux
+
+; Make sure that we can undrift the MemProf profile and annotate the IR
+; accordingly.
+;
+; The IR was generated from:
+;
+; char *foo() { return ::new char[4]; }
+; char *leaf() { return ::new char[4]; }
+; char *middle() { return leaf(); }
+; char *aaa() { return middle(); }
+; char *bbb() { return middle(); }
+;
+; int main() {
+; foo();
+;
+; char *a = aaa();
+; char *b = bbb();
+; a[0] = 'a';
+; b[0] = 'b';
+; delete[] a;
+; sleep(10);
+; delete[] b;
+;
+; return 0;
+; }
+
+; RUN: split-file %s %t
+; RUN: llvm-profdata merge %t/memprof_undrift.yaml -o %t/memprof_undrift.memprofdata
+; RUN: opt < %t/memprof_undrift.ll -passes='memprof-use<profile-filename=%t/memprof_undrift.memprofdata>' -memprof-salvage-stale-profile -memprof-ave-lifetime-cold-threshold=5 -S 2>&1 | FileCheck %s
+
+;--- memprof_undrift.yaml
+---
+HeapProfileRecords:
+ - GUID: _Z3aaav
+ AllocSites: []
+ CallSites:
+ - - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - GUID: _Z6middlev
+ AllocSites: []
+ CallSites:
+ - - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - GUID: _Z3foov
+ AllocSites:
+ - Callstack:
+ - { Function: _Z3foov, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 4
+ TotalLifetime: 10000
+ TotalLifetimeAccessDensity: 0
+ CallSites: []
+ - GUID: _Z4leafv
+ AllocSites:
+ - Callstack:
+ - { Function: _Z4leafv, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 4
+ TotalLifetime: 0
+ TotalLifetimeAccessDensity: 25000
+ - Callstack:
+ - { Function: _Z4leafv, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ - { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false }
+ MemInfoBlock:
+ AllocCount: 1
+ TotalSize: 4
+ TotalLifetime: 10000
+ TotalLifetimeAccessDensity: 2
+ CallSites: []
+ - GUID: _Z3bbbv
+ AllocSites: []
+ CallSites:
+ - - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
+...
+;--- memprof_undrift.ll
+define dso_local ptr @_Z3foov() !dbg !5 {
+; CHECK-LABEL: @_Z3foov()
+entry:
+ %call = call ptr @_Znam(i64 4) #1, !dbg !8
+; CHECK: call ptr @_Znam(i64 4) #[[ATTR:[0-9]+]]
+ ret ptr %call, !dbg !9
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare ptr @_Znam(i64 noundef) #0
+
+define dso_local ptr @_Z4leafv() !dbg !10 {
+; CHECK-LABEL: @_Z4leafv()
+entry:
+ %call = call ptr @_Znam(i64 4) #1, !dbg !11
+; CHECK: call ptr @_Znam(i64 4) {{.*}}, !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
+ ret ptr %call, !dbg !12
+}
+
+define dso_local ptr @_Z6middlev() !dbg !13 {
+; CHECK-LABEL: @_Z6middlev()
+entry:
+ %call.i = call ptr @_Znam(i64 4) #1, !dbg !14
+; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C2:[0-9]+]]
+ ret ptr %call.i, !dbg !16
+}
+
+define dso_local ptr @_Z3aaav() !dbg !17 {
+; CHECK-LABEL: @_Z3aaav()
+entry:
+ %call.i.i = call ptr @_Znam(i64 4) #1, !dbg !18
+; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C3:[0-9]+]]
+ ret ptr %call.i.i, !dbg !21
+}
+
+define dso_local ptr @_Z3bbbv() !dbg !22 {
+; CHECK-LABEL: @_Z3bbbv()
+entry:
+ %call.i.i = call ptr @_Znam(i64 4) #1, !dbg !23
+; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C4:[0-9]+]]
+ ret ptr %call.i.i, !dbg !26
+}
+
+attributes #0 = { nobuiltin allocsize(0) }
+attributes #1 = { builtin allocsize(0) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
+!1 = !DIFile(filename: "undrift.cc", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{}
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 56, type: !6, unit: !0)
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 56, column: 22, scope: !5)
+!9 = !DILocation(line: 56, column: 15, scope: !5)
+!10 = distinct !DISubprogram(name: "leaf", linkageName: "_Z4leafv", scope: !1, file: !1, line: 58, type: !6, unit: !0)
+!11 = !DILocation(line: 58, column: 23, scope: !10)
+!12 = !DILocation(line: 58, column: 16, scope: !10)
+!13 = distinct !DISubprogram(name: "middle", linkageName: "_Z6middlev", scope: !1, file: !1, line: 59, type: !6, unit: !0)
+!14 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !15)
+!15 = distinct !DILocation(line: 59, column: 25, scope: !13)
+!16 = !DILocation(line: 59, column: 18, scope: !13)
+!17 = distinct !DISubprogram(name: "aaa", linkageName: "_Z3aaav", scope: !1, file: !1, line: 61, type: !6, unit: !0)
+!18 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !19)
+!19 = distinct !DILocation(line: 59, column: 25, scope: !13, inlinedAt: !20)
+!20 = distinct !DILocation(line: 61, column: 22, scope: !17)
+!21 = !DILocation(line: 61, column: 15, scope: !17)
+!22 = distinct !DISubprogram(name: "bbb", linkageName: "_Z3bbbv", scope: !1, file: !1, line: 62, type: !6, unit: !0)
+!23 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !24)
+!24 = distinct !DILocation(line: 59, column: 25, scope: !13, inlinedAt: !25)
+!25 = distinct !DILocation(line: 62, column: 22, scope: !22)
+!26 = !DILocation(line: 62, column: 15, scope: !22)
+
+; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="cold" }
+
+; CHECK: ![[M1]] = !{![[M1L:[0-9]+]], ![[M1R:[0-9]+]]}
+; CHECK: ![[M1L]] = !{![[M1LL:[0-9]+]], !"cold"}
+; CHECK: ![[M1LL]] = !{i64 -7165227774426488445, i64 6179674587295384169, i64 7749555980993309703}
+; CHECK: ![[M1R]] = !{![[M1RL:[0-9]+]], !"notcold"}
+; CHECK: ![[M1RL]] = !{i64 -7165227774426488445, i64 6179674587295384169, i64 -4748707735015301746}
+
+; CHECK: ![[C1]] = !{i64 -7165227774426488445}
+
+; CHECK: ![[C2]] = !{i64 6179674587295384169}
+
+; CHECK: ![[C3]] = !{i64 -4748707735015301746}
+
+; CHECK: ![[C4]] = !{i64 7749555980993309703}
diff --git a/llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll b/llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll
new file mode 100644
index 0000000..d3995f1
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/slp-deleted-inst.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=slp-vectorizer < %s | FileCheck %s
+
+define void @foo() {
+; CHECK-LABEL: define void @foo() {
+; CHECK-NEXT: [[BB:.*]]:
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP11:%.*]], %[[BB3:.*]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NEXT: br label %[[BB3]]
+; CHECK: [[BB3]]:
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i32> [[TMP0]] to <2 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = mul <2 x i1> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i1> zeroinitializer, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = and <2 x i1> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i32
+; CHECK-NEXT: [[I22:%.*]] = or i32 [[TMP6]], [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[I22]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ult <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11]] = select <2 x i1> [[TMP10]], <2 x i32> zeroinitializer, <2 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[BB1]]
+;
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb3, %bb
+ %i = phi i32 [ %i26, %bb3 ], [ 0, %bb ]
+ %i2 = phi i32 [ %i24, %bb3 ], [ 0, %bb ]
+ br label %bb3
+
+bb3: ; preds = %bb1
+ %i4 = zext i32 %i2 to i64
+ %i5 = mul i64 %i4, 0
+ %i10 = or i64 0, %i5
+ %i11 = trunc i64 %i10 to i32
+ %i12 = and i32 %i11, 0
+ %i13 = zext i32 %i to i64
+ %i14 = mul i64 %i13, 0
+ %i19 = or i64 0, %i14
+ %i20 = trunc i64 %i19 to i32
+ %i21 = and i32 %i20, 0
+ %i22 = or i32 %i12, %i21
+ %i23 = icmp ult i32 %i22, 0
+ %i24 = select i1 %i23, i32 0, i32 0
+ %i25 = icmp ult i32 0, 0
+ %i26 = select i1 %i25, i32 0, i32 0
+ br label %bb1
+}
diff --git a/llvm/test/tools/llvm-cov/branch-macros.cpp b/llvm/test/tools/llvm-cov/branch-macros.cpp
index 73042ac..7f3d1e8 100644
--- a/llvm/test/tools/llvm-cov/branch-macros.cpp
+++ b/llvm/test/tools/llvm-cov/branch-macros.cpp
@@ -5,7 +5,7 @@
#define COND1 (a == b)
#define COND2 (a != b)
#define COND3 (COND1 && COND2)
-#define COND4 (COND3 ? COND2 : COND1)
+#define COND4 (COND3 ? COND2 : COND1) // CHECK: | Branch ([[@LINE]]:15): [True: 1, False: 2]
#define MACRO1 COND3
#define MACRO2 MACRO1
#define MACRO3 MACRO2
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
new file mode 100644
index 0000000..bdc02d4
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-A.s
@@ -0,0 +1,59 @@
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOAND_D -mattr="+a" | FileCheck --check-prefix=AMOAND_D %s
+
+AMOAND_D: ---
+AMOAND_D-NEXT: mode: latency
+AMOAND_D-NEXT: key:
+AMOAND_D-NEXT: instructions:
+AMOAND_D-NEXT: - 'AMOAND_D [[RE01:X[0-9]+]] X10 [[RE01:X[0-9]+]]'
+AMOAND_D-NEXT: config: ''
+AMOAND_D-NEXT: register_initial_values:
+AMOAND_D-NEXT: - '[[RE01:X[0-9]+]]=0x0'
+AMOAND_D-DAG: ...
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOADD_W -mattr="+a" | FileCheck --check-prefix=AMOADD_W %s
+
+AMOADD_W: ---
+AMOADD_W-NEXT: mode: latency
+AMOADD_W-NEXT: key:
+AMOADD_W-NEXT: instructions:
+AMOADD_W-NEXT: - 'AMOADD_W [[RE02:X[0-9]+]] X10 [[RE02:X[0-9]+]]'
+AMOADD_W-NEXT: config: ''
+AMOADD_W-NEXT: register_initial_values:
+AMOADD_W-NEXT: - '[[RE02:X[0-9]+]]=0x0'
+AMOADD_W-DAG: ...
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOMAXU_D -mattr="+a" | FileCheck --check-prefix=AMOMAXU_D %s
+
+AMOMAXU_D: ---
+AMOMAXU_D-NEXT: mode: latency
+AMOMAXU_D-NEXT: key:
+AMOMAXU_D-NEXT: instructions:
+AMOMAXU_D-NEXT: - 'AMOMAXU_D [[RE03:X[0-9]+]] X10 [[RE03:X[0-9]+]]'
+AMOMAXU_D-NEXT: config: ''
+AMOMAXU_D-NEXT: register_initial_values:
+AMOMAXU_D-NEXT: - '[[RE03:X[0-9]+]]=0x0'
+AMOMAXU_D-DAG: ...
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOMIN_W -mattr="+a" | FileCheck --check-prefix=AMOMIN_W %s
+
+AMOMIN_W: ---
+AMOMIN_W-NEXT: mode: latency
+AMOMIN_W-NEXT: key:
+AMOMIN_W-NEXT: instructions:
+AMOMIN_W-NEXT: - 'AMOMIN_W [[RE04:X[0-9]+]] X10 [[RE04:X[0-9]+]]'
+AMOMIN_W-NEXT: config: ''
+AMOMIN_W-NEXT: register_initial_values:
+AMOMIN_W-NEXT: - '[[RE04:X[0-9]+]]=0x0'
+AMOMIN_W-DAG: ...
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=AMOXOR_D -mattr="+a" | FileCheck --check-prefix=AMOXOR_D %s
+
+AMOXOR_D: ---
+AMOXOR_D-NEXT: mode: latency
+AMOXOR_D-NEXT: key:
+AMOXOR_D-NEXT: instructions:
+AMOXOR_D-NEXT: - 'AMOXOR_D [[RE05:X[0-9]+]] X10 [[RE05:X[0-9]+]]'
+AMOXOR_D-NEXT: config: ''
+AMOXOR_D-NEXT: register_initial_values:
+AMOXOR_D-NEXT: - '[[RE05:X[0-9]+]]=0x0'
+AMOXOR_D-DAG: ...
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s
new file mode 100644
index 0000000..9e94f02
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-extension-C.s
@@ -0,0 +1,48 @@
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_ADDI -mattr=+c | FileCheck --check-prefix=C_ADDI %s
+
+C_ADDI: ---
+C_ADDI-NEXT: mode: latency
+C_ADDI-NEXT: key:
+C_ADDI-NEXT: instructions:
+C_ADDI-NEXT: - 'C_ADDI [[REG01:X[0-9]+]] [[RE02:X[0-9]+]] [[IMM0:i_0x[0-9]+]]'
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_ADDIW -mattr=+c | FileCheck --check-prefix=C_ADDIW %s
+
+C_ADDIW: ---
+C_ADDIW-NEXT: mode: latency
+C_ADDIW-NEXT: key:
+C_ADDIW-NEXT: instructions:
+C_ADDIW-NEXT: - 'C_ADDIW [[REG11:X[0-9]+]] [[RE12:X[0-9]+]] [[IMM1:i_0x[0-9]+]]'
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_ANDI -mattr=+c | FileCheck --check-prefix=C_ANDI %s
+
+C_ANDI: ---
+C_ANDI-NEXT: mode: latency
+C_ANDI-NEXT: key:
+C_ANDI-NEXT: instructions:
+C_ANDI-NEXT: - 'C_ANDI [[REG31:X[0-9]+]] [[REG32:X[0-9]+]] [[IMM3:i_0x[0-9]+]]'
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_SLLI -mattr=+c | FileCheck --check-prefix=C_SLLI %s
+
+C_SLLI: ---
+C_SLLI-NEXT: mode: latency
+C_SLLI-NEXT: key:
+C_SLLI-NEXT: instructions:
+C_SLLI-NEXT: - 'C_SLLI [[REG81:X[0-9]+]] [[REG82:X[0-9]+]] [[IMM8:i_0x[0-9]+]]'
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_SRAI -mattr=+c | FileCheck --check-prefix=C_SRAI %s
+
+C_SRAI: ---
+C_SRAI-NEXT: mode: latency
+C_SRAI-NEXT: key:
+C_SRAI-NEXT: instructions:
+C_SRAI-NEXT: - 'C_SRAI [[REG91:X[0-9]+]] [[REG92:X[0-9]+]] [[IMM9:i_0x[0-9]+]]'
+
+# RUN: llvm-exegesis -mode=latency -mtriple=riscv64-unknown-linux-gnu --mcpu=generic --benchmark-phase=assemble-measured-code -opcode-name=C_SRLI -mattr=+c | FileCheck --check-prefix=C_SRLI %s
+
+C_SRLI: ---
+C_SRLI-NEXT: mode: latency
+C_SRLI-NEXT: key:
+C_SRLI-NEXT: instructions:
+C_SRLI-NEXT: - 'C_SRLI [[REG101:X[0-9]+]] [[REG102:X[0-9]+]] [[IMM10:i_0x[0-9]+]]'
+C_SRLI-DAG: ...
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s
new file mode 100644
index 0000000..2dea89c
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/latency-by-opcode-name-FADD_D.s
@@ -0,0 +1,11 @@
+# RUN: llvm-exegesis -mtriple=riscv64-unknown-linux-gnu --mcpu=generic -mode=latency --benchmark-phase=assemble-measured-code -mattr=+d -opcode-name=FADD_D | FileCheck %s
+
+CHECK: ---
+CHECK-NEXT: mode: latency
+CHECK-NEXT: key:
+CHECK-NEXT: instructions:
+CHECK-NEXT: - 'FADD_D [[REG1:F[0-9]+_D]] [[REG2:F[0-9]+_D]] [[REG3:F[0-9]+_D]] i_0x7'
+CHECK-NEXT: config: ''
+CHECK-NEXT: register_initial_values:
+CHECK-DAG: - '[[REG1]]=0x0'
+CHECK-DAG: ...
diff --git a/llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg b/llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg
new file mode 100644
index 0000000..466ccc2
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/RISCV/lit.local.cfg
@@ -0,0 +1,3 @@
+if not ("RISCV" in config.root.targets):
+ # We need support for RISCV.
+ config.unsupported = True
diff --git a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt
index 414b49e..d95c37f 100644
--- a/llvm/tools/llvm-exegesis/lib/CMakeLists.txt
+++ b/llvm/tools/llvm-exegesis/lib/CMakeLists.txt
@@ -12,6 +12,9 @@ endif()
if (LLVM_TARGETS_TO_BUILD MATCHES "Mips")
list(APPEND LLVM_EXEGESIS_TARGETS "Mips")
endif()
+if(LLVM_TARGETS_TO_BUILD MATCHES "RISCV")
+ list(APPEND LLVM_EXEGESIS_TARGETS "RISCV")
+endif()
set(LLVM_EXEGESIS_TARGETS ${LLVM_EXEGESIS_TARGETS} PARENT_SCOPE)
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index 9c926d1..c9225e5 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -95,11 +95,12 @@ Instruction::Instruction(const MCInstrDesc *Description, StringRef Name,
const BitVector *ImplDefRegs,
const BitVector *ImplUseRegs,
const BitVector *AllDefRegs,
- const BitVector *AllUseRegs)
+ const BitVector *AllUseRegs,
+ const BitVector *NonMemoryRegs)
: Description(*Description), Name(Name), Operands(std::move(Operands)),
Variables(std::move(Variables)), ImplDefRegs(*ImplDefRegs),
ImplUseRegs(*ImplUseRegs), AllDefRegs(*AllDefRegs),
- AllUseRegs(*AllUseRegs) {}
+ AllUseRegs(*AllUseRegs), NonMemoryRegs(*NonMemoryRegs) {}
std::unique_ptr<Instruction>
Instruction::create(const MCInstrInfo &InstrInfo,
@@ -166,6 +167,8 @@ Instruction::create(const MCInstrInfo &InstrInfo,
BitVector ImplUseRegs = RATC.emptyRegisters();
BitVector AllDefRegs = RATC.emptyRegisters();
BitVector AllUseRegs = RATC.emptyRegisters();
+ BitVector NonMemoryRegs = RATC.emptyRegisters();
+
for (const auto &Op : Operands) {
if (Op.isReg()) {
const auto &AliasingBits = Op.getRegisterAliasing().aliasedBits();
@@ -177,6 +180,8 @@ Instruction::create(const MCInstrInfo &InstrInfo,
ImplDefRegs |= AliasingBits;
if (Op.isUse() && Op.isImplicit())
ImplUseRegs |= AliasingBits;
+ if (Op.isUse() && !Op.isMemory())
+ NonMemoryRegs |= AliasingBits;
}
}
// Can't use make_unique because constructor is private.
@@ -185,7 +190,8 @@ Instruction::create(const MCInstrInfo &InstrInfo,
std::move(Variables), BVC.getUnique(std::move(ImplDefRegs)),
BVC.getUnique(std::move(ImplUseRegs)),
BVC.getUnique(std::move(AllDefRegs)),
- BVC.getUnique(std::move(AllUseRegs))));
+ BVC.getUnique(std::move(AllUseRegs)),
+ BVC.getUnique(std::move(NonMemoryRegs))));
}
const Operand &Instruction::getPrimaryOperand(const Variable &Var) const {
@@ -240,6 +246,12 @@ bool Instruction::hasAliasingRegisters(
ForbiddenRegisters);
}
+bool Instruction::hasAliasingNotMemoryRegisters(
+ const BitVector &ForbiddenRegisters) const {
+ return anyCommonExcludingForbidden(AllDefRegs, NonMemoryRegs,
+ ForbiddenRegisters);
+}
+
bool Instruction::hasOneUseOrOneDef() const {
return AllDefRegs.count() || AllUseRegs.count();
}
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index f8ebc07..d7712e2 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -133,6 +133,12 @@ struct Instruction {
// aliasing Use and Def registers.
bool hasAliasingRegisters(const BitVector &ForbiddenRegisters) const;
+ // Whether this instruction is self aliasing through some registers.
+ // Repeating this instruction may execute sequentially by picking aliasing
+ // Def and Not Memory Use registers. It may also execute in parallel by
+ // picking non aliasing Def and Not Memory Use registers.
+ bool hasAliasingNotMemoryRegisters(const BitVector &ForbiddenRegisters) const;
+
// Whether this instruction's registers alias with OtherInstr's registers.
bool hasAliasingRegistersThrough(const Instruction &OtherInstr,
const BitVector &ForbiddenRegisters) const;
@@ -160,12 +166,15 @@ struct Instruction {
const BitVector &ImplUseRegs; // The set of aliased implicit use registers.
const BitVector &AllDefRegs; // The set of all aliased def registers.
const BitVector &AllUseRegs; // The set of all aliased use registers.
+ // The set of all aliased not memory use registers.
+ const BitVector &NonMemoryRegs;
+
private:
Instruction(const MCInstrDesc *Description, StringRef Name,
SmallVector<Operand, 8> Operands,
SmallVector<Variable, 4> Variables, const BitVector *ImplDefRegs,
const BitVector *ImplUseRegs, const BitVector *AllDefRegs,
- const BitVector *AllUseRegs);
+ const BitVector *AllUseRegs, const BitVector *NonMemoryRegs);
};
// Instructions are expensive to instantiate. This class provides a cache of
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
new file mode 100644
index 0000000..489ac6d
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/CMakeLists.txt
@@ -0,0 +1,22 @@
+include_directories(
+ ${LLVM_MAIN_SRC_DIR}/lib/Target/RISCV
+ ${LLVM_BINARY_DIR}/lib/Target/RISCV
+)
+
+set(LLVM_LINK_COMPONENTS
+ CodeGen
+ RISCV
+ Exegesis
+ Core
+ Support
+ )
+
+add_llvm_library(LLVMExegesisRISCV
+ DISABLE_LLVM_LINK_LLVM_DYLIB
+ STATIC
+ Target.cpp
+
+ DEPENDS
+ intrinsics_gen
+ RISCVCommonTableGen
+ )
diff --git a/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
new file mode 100644
index 0000000..41d3615
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/RISCV/Target.cpp
@@ -0,0 +1,272 @@
+//===-- Target.cpp ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "../Target.h"
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "MCTargetDesc/RISCVMatInt.h"
+#include "RISCVInstrInfo.h"
+
+// include computeAvailableFeatures and computeRequiredFeatures.
+#define GET_AVAILABLE_OPCODE_CHECKER
+#include "RISCVGenInstrInfo.inc"
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+#include <vector>
+
+namespace llvm {
+namespace exegesis {
+
+namespace {
+
+// Stores constant value to a general-purpose (integer) register.
+static std::vector<MCInst> loadIntReg(const MCSubtargetInfo &STI, unsigned Reg,
+ const APInt &Value) {
+ SmallVector<MCInst, 8> MCInstSeq;
+ std::vector<MCInst> MatIntInstrs;
+ MCRegister DestReg = Reg;
+
+ RISCVMatInt::generateMCInstSeq(Value.getSExtValue(), STI, DestReg, MCInstSeq);
+ MatIntInstrs.resize(MCInstSeq.size());
+ std::copy(MCInstSeq.begin(), MCInstSeq.end(), MatIntInstrs.begin());
+
+ return MatIntInstrs;
+}
+
+const unsigned ScratchIntReg = RISCV::X30; // t5
+
+// Stores constant bits to a floating-point register.
+static std::vector<MCInst> loadFPRegBits(const MCSubtargetInfo &STI,
+ unsigned Reg, const APInt &Bits,
+ unsigned FmvOpcode) {
+ std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
+ Instrs.push_back(MCInstBuilder(FmvOpcode).addReg(Reg).addReg(ScratchIntReg));
+ return Instrs;
+}
+
+// main idea is:
+// we support APInt only if (represented as double) it has zero fractional
+// part: 1.0, 2.0, 3.0, etc... then we can do the trick: write int to tmp reg t5
+// and then do FCVT this is only reliable thing in 32-bit mode, otherwise we
+// need to use __floatsidf
+static std::vector<MCInst> loadFP64RegBits32(const MCSubtargetInfo &STI,
+ unsigned Reg, const APInt &Bits) {
+ double D = Bits.bitsToDouble();
+ double IPart;
+ double FPart = std::modf(D, &IPart);
+
+ if (std::abs(FPart) > std::numeric_limits<double>::epsilon()) {
+ errs() << "loadFP64RegBits32 is not implemented for doubles like " << D
+ << ", please remove fractional part\n";
+ return {};
+ }
+
+ std::vector<MCInst> Instrs = loadIntReg(STI, ScratchIntReg, Bits);
+ Instrs.push_back(
+ MCInstBuilder(RISCV::FCVT_D_W).addReg(Reg).addReg(ScratchIntReg));
+ return Instrs;
+}
+
+static MCInst nop() {
+ // ADDI X0, X0, 0
+ return MCInstBuilder(RISCV::ADDI)
+ .addReg(RISCV::X0)
+ .addReg(RISCV::X0)
+ .addImm(0);
+}
+
+static bool isVectorRegList(unsigned Reg) {
+ return RISCV::VRM2RegClass.contains(Reg) ||
+ RISCV::VRM4RegClass.contains(Reg) ||
+ RISCV::VRM8RegClass.contains(Reg) ||
+ RISCV::VRN2M1RegClass.contains(Reg) ||
+ RISCV::VRN2M2RegClass.contains(Reg) ||
+ RISCV::VRN2M4RegClass.contains(Reg) ||
+ RISCV::VRN3M1RegClass.contains(Reg) ||
+ RISCV::VRN3M2RegClass.contains(Reg) ||
+ RISCV::VRN4M1RegClass.contains(Reg) ||
+ RISCV::VRN4M2RegClass.contains(Reg) ||
+ RISCV::VRN5M1RegClass.contains(Reg) ||
+ RISCV::VRN6M1RegClass.contains(Reg) ||
+ RISCV::VRN7M1RegClass.contains(Reg) ||
+ RISCV::VRN8M1RegClass.contains(Reg);
+}
+
+class ExegesisRISCVTarget : public ExegesisTarget {
+public:
+ ExegesisRISCVTarget();
+
+ bool matchesArch(Triple::ArchType Arch) const override;
+
+ std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
+ const APInt &Value) const override;
+
+ unsigned getDefaultLoopCounterRegister(const Triple &) const override;
+
+ void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
+ MachineBasicBlock &TargetMBB,
+ const MCInstrInfo &MII,
+ unsigned LoopRegister) const override;
+
+ unsigned getScratchMemoryRegister(const Triple &TT) const override;
+
+ void fillMemoryOperands(InstructionTemplate &IT, unsigned Reg,
+ unsigned Offset) const override;
+
+ ArrayRef<unsigned> getUnavailableRegisters() const override;
+
+ Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var,
+ MCOperand &AssignedValue,
+ const BitVector &ForbiddenRegs) const override;
+
+ std::vector<InstructionTemplate>
+ generateInstructionVariants(const Instruction &Instr,
+ unsigned MaxConfigsPerOpcode) const override;
+};
+
+ExegesisRISCVTarget::ExegesisRISCVTarget()
+ : ExegesisTarget(ArrayRef<CpuAndPfmCounters>{},
+ RISCV_MC::isOpcodeAvailable) {}
+
+bool ExegesisRISCVTarget::matchesArch(Triple::ArchType Arch) const {
+ return Arch == Triple::riscv32 || Arch == Triple::riscv64;
+}
+
+std::vector<MCInst> ExegesisRISCVTarget::setRegTo(const MCSubtargetInfo &STI,
+ unsigned Reg,
+ const APInt &Value) const {
+ if (RISCV::GPRRegClass.contains(Reg))
+ return loadIntReg(STI, Reg, Value);
+ if (RISCV::FPR16RegClass.contains(Reg))
+ return loadFPRegBits(STI, Reg, Value, RISCV::FMV_H_X);
+ if (RISCV::FPR32RegClass.contains(Reg))
+ return loadFPRegBits(STI, Reg, Value, RISCV::FMV_W_X);
+ if (RISCV::FPR64RegClass.contains(Reg)) {
+ if (STI.hasFeature(RISCV::Feature64Bit))
+ return loadFPRegBits(STI, Reg, Value, RISCV::FMV_D_X);
+ return loadFP64RegBits32(STI, Reg, Value);
+ }
+ if (Reg == RISCV::FRM || Reg == RISCV::VL || Reg == RISCV::VLENB ||
+ Reg == RISCV::VTYPE || RISCV::GPRPairRegClass.contains(Reg) ||
+ RISCV::VRRegClass.contains(Reg) || isVectorRegList(Reg)) {
+ // Don't initialize:
+ // - FRM
+ // - VL, VLENB, VTYPE
+ // - vector registers (and vector register lists)
+ // - Zfinx registers
+ // Generate 'NOP' so that exegesis treats such registers as initialized
+ // (it tries to initialize them with '0' anyway).
+ return {nop()};
+ }
+ errs() << "setRegTo is not implemented for Reg " << Reg
+ << ", results will be unreliable\n";
+ return {};
+}
+
+const unsigned DefaultLoopCounterReg = RISCV::X31; // t6
+const unsigned ScratchMemoryReg = RISCV::X10; // a0
+
+unsigned
+ExegesisRISCVTarget::getDefaultLoopCounterRegister(const Triple &) const {
+ return DefaultLoopCounterReg;
+}
+
+void ExegesisRISCVTarget::decrementLoopCounterAndJump(
+ MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
+ const MCInstrInfo &MII, unsigned LoopRegister) const {
+ BuildMI(&MBB, DebugLoc(), MII.get(RISCV::ADDI))
+ .addDef(LoopRegister)
+ .addUse(LoopRegister)
+ .addImm(-1);
+ BuildMI(&MBB, DebugLoc(), MII.get(RISCV::BNE))
+ .addUse(LoopRegister)
+ .addUse(RISCV::X0)
+ .addMBB(&TargetMBB);
+}
+
+unsigned ExegesisRISCVTarget::getScratchMemoryRegister(const Triple &TT) const {
+ return ScratchMemoryReg; // a0
+}
+
+void ExegesisRISCVTarget::fillMemoryOperands(InstructionTemplate &IT,
+ unsigned Reg,
+ unsigned Offset) const {
+ // TODO: for now we ignore Offset because have no way
+ // to detect it in instruction.
+ auto &I = IT.getInstr();
+
+ auto MemOpIt =
+ find_if(I.Operands, [](const Operand &Op) { return Op.isMemory(); });
+ assert(MemOpIt != I.Operands.end() &&
+ "Instruction must have memory operands");
+
+ const Operand &MemOp = *MemOpIt;
+
+ assert(MemOp.isReg() && "Memory operand expected to be register");
+
+ IT.getValueFor(MemOp) = MCOperand::createReg(Reg);
+}
+
+const unsigned UnavailableRegisters[4] = {RISCV::X0, DefaultLoopCounterReg,
+ ScratchIntReg, ScratchMemoryReg};
+
+ArrayRef<unsigned> ExegesisRISCVTarget::getUnavailableRegisters() const {
+ return UnavailableRegisters;
+}
+
+Error ExegesisRISCVTarget::randomizeTargetMCOperand(
+ const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue,
+ const BitVector &ForbiddenRegs) const {
+ uint8_t OperandType =
+ Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType;
+
+ switch (OperandType) {
+ case RISCVOp::OPERAND_FRMARG:
+ AssignedValue = MCOperand::createImm(RISCVFPRndMode::DYN);
+ break;
+ case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
+ AssignedValue = MCOperand::createImm(0b1 << 4);
+ break;
+ case RISCVOp::OPERAND_SIMM6_NONZERO:
+ case RISCVOp::OPERAND_UIMMLOG2XLEN_NONZERO:
+ AssignedValue = MCOperand::createImm(1);
+ break;
+ default:
+ if (OperandType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
+ OperandType <= RISCVOp::OPERAND_LAST_RISCV_IMM)
+ AssignedValue = MCOperand::createImm(0);
+ }
+ return Error::success();
+}
+
+std::vector<InstructionTemplate>
+ExegesisRISCVTarget::generateInstructionVariants(
+ const Instruction &Instr, unsigned int MaxConfigsPerOpcode) const {
+ InstructionTemplate IT{&Instr};
+ for (const Operand &Op : Instr.Operands)
+ if (Op.isMemory()) {
+ IT.getValueFor(Op) = MCOperand::createReg(ScratchMemoryReg);
+ }
+ return {IT};
+}
+
+} // anonymous namespace
+
+static ExegesisTarget *getTheRISCVExegesisTarget() {
+ static ExegesisRISCVTarget Target;
+ return &Target;
+}
+
+void InitializeRISCVExegesisTarget() {
+ ExegesisTarget::registerTarget(getTheRISCVExegesisTarget());
+}
+
+} // namespace exegesis
+} // namespace llvm
diff --git a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
index 7100b51..25cdf1c 100644
--- a/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SerialSnippetGenerator.cpp
@@ -53,13 +53,6 @@ computeAliasingInstructions(const LLVMState &State, const Instruction *Instr,
if (OtherOpcode == Instr->Description.getOpcode())
continue;
const Instruction &OtherInstr = State.getIC().getInstr(OtherOpcode);
- const MCInstrDesc &OtherInstrDesc = OtherInstr.Description;
- // Ignore instructions that we cannot run.
- if (OtherInstrDesc.isPseudo() || OtherInstrDesc.usesCustomInsertionHook() ||
- OtherInstrDesc.isBranch() || OtherInstrDesc.isIndirectBranch() ||
- OtherInstrDesc.isCall() || OtherInstrDesc.isReturn()) {
- continue;
- }
if (OtherInstr.hasMemoryOperands())
continue;
if (!ET.allowAsBackToBack(OtherInstr))
@@ -81,12 +74,10 @@ static ExecutionMode getExecutionModes(const Instruction &Instr,
EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
if (Instr.hasMemoryOperands())
EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
- else {
- if (Instr.hasAliasingRegisters(ForbiddenRegisters))
- EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
- if (Instr.hasOneUseOrOneDef())
- EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
- }
+ if (Instr.hasAliasingNotMemoryRegisters(ForbiddenRegisters))
+ EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
+ if (Instr.hasOneUseOrOneDef())
+ EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
return EM;
}
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index 7dcff60..48357d4 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -73,6 +73,9 @@ Error SnippetGenerator::generateConfigurations(
for (CodeTemplate &CT : Templates) {
// TODO: Generate as many BenchmarkCode as needed.
{
+ CT.ScratchSpacePointerInReg =
+ State.getExegesisTarget().getScratchMemoryRegister(
+ State.getTargetMachine().getTargetTriple());
BenchmarkCode BC;
BC.Info = CT.Info;
BC.Key.Instructions.reserve(CT.Instructions.size());
@@ -108,6 +111,12 @@ std::vector<RegisterValue> SnippetGenerator::computeRegisterInitialValues(
// Loop invariant: DefinedRegs[i] is true iif it has been set at least once
// before the current instruction.
BitVector DefinedRegs = State.getRATC().emptyRegisters();
+ // If target always expects a scratch memory register as live input,
+ // mark it as defined.
+ const ExegesisTarget &Target = State.getExegesisTarget();
+ unsigned ScratchMemoryReg = Target.getScratchMemoryRegister(
+ State.getTargetMachine().getTargetTriple());
+ DefinedRegs.set(ScratchMemoryReg);
std::vector<RegisterValue> RIV;
for (const InstructionTemplate &IT : Instructions) {
// Returns the register that this Operand sets or uses, or 0 if this is not
@@ -200,7 +209,8 @@ static void setRegisterOperandValue(const RegisterOperandAssignment &ROV,
if (ROV.Op->isExplicit()) {
auto &AssignedValue = IB.getValueFor(*ROV.Op);
if (AssignedValue.isValid()) {
- assert(AssignedValue.isReg() && AssignedValue.getReg() == ROV.Reg);
+ // TODO don't re-assign register operands which are already "locked"
+ // by Target in corresponding InstructionTemplate
return;
}
AssignedValue = MCOperand::createReg(ROV.Reg);
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 546ec770..fa37e05 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -274,6 +274,10 @@ static cl::opt<int> BenchmarkProcessCPU(
cl::desc("The CPU number that the benchmarking process should executon on"),
cl::cat(BenchmarkOptions), cl::init(-1));
+static cl::opt<std::string> MAttr(
+ "mattr", cl::desc("comma-separated list of target architecture features"),
+ cl::value_desc("+feature1,-feature2,..."), cl::cat(Options), cl::init(""));
+
static ExitOnError ExitOnErr("llvm-exegesis error: ");
// Helper function that logs the error(s) and exits.
@@ -296,6 +300,18 @@ T ExitOnFileError(const Twine &FileName, Expected<T> &&E) {
return std::move(*E);
}
+static const char *getIgnoredOpcodeReasonOrNull(const LLVMState &State,
+ unsigned Opcode) {
+ const MCInstrDesc &InstrDesc = State.getIC().getInstr(Opcode).Description;
+ if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
+ return "Unsupported opcode: isPseudo/usesCustomInserter";
+ if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
+ return "Unsupported opcode: isBranch/isIndirectBranch";
+ if (InstrDesc.isCall() || InstrDesc.isReturn())
+ return "Unsupported opcode: isCall/isReturn";
+ return nullptr;
+}
+
// Checks that only one of OpcodeNames, OpcodeIndex or SnippetsFile is provided,
// and returns the opcode indices or {} if snippets should be read from
// `SnippetsFile`.
@@ -334,6 +350,7 @@ static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {
return I->getSecond();
return 0u;
};
+
SmallVector<StringRef, 2> Pieces;
StringRef(OpcodeNames.getValue())
.split(Pieces, ",", /* MaxSplit */ -1, /* KeepEmpty */ false);
@@ -352,17 +369,11 @@ static std::vector<unsigned> getOpcodesOrDie(const LLVMState &State) {
static Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState &State, unsigned Opcode,
const BitVector &ForbiddenRegs) {
- const Instruction &Instr = State.getIC().getInstr(Opcode);
- const MCInstrDesc &InstrDesc = Instr.Description;
// Ignore instructions that we cannot run.
- if (InstrDesc.isPseudo() || InstrDesc.usesCustomInsertionHook())
- return make_error<Failure>(
- "Unsupported opcode: isPseudo/usesCustomInserter");
- if (InstrDesc.isBranch() || InstrDesc.isIndirectBranch())
- return make_error<Failure>("Unsupported opcode: isBranch/isIndirectBranch");
- if (InstrDesc.isCall() || InstrDesc.isReturn())
- return make_error<Failure>("Unsupported opcode: isCall/isReturn");
+ if (const char *Reason = getIgnoredOpcodeReasonOrNull(State, Opcode))
+ return make_error<Failure>(Reason);
+ const Instruction &Instr = State.getIC().getInstr(Opcode);
const std::vector<InstructionTemplate> InstructionVariants =
State.getExegesisTarget().generateInstructionVariants(
Instr, MaxConfigsPerOpcode);
@@ -485,8 +496,8 @@ void benchmarkMain() {
LLVMInitialize##TargetName##AsmParser();
#include "llvm/Config/TargetExegesis.def"
- const LLVMState State =
- ExitOnErr(LLVMState::Create(TripleName, MCPU, "", UseDummyPerfCounters));
+ const LLVMState State = ExitOnErr(
+ LLVMState::Create(TripleName, MCPU, MAttr, UseDummyPerfCounters));
// Preliminary check to ensure features needed for requested
// benchmark mode are present on target CPU and/or OS.
diff --git a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
index 55cbe83..847cca7 100644
--- a/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPDomTreeTest.cpp
@@ -40,13 +40,12 @@ TEST(VPDominatorTreeTest, DominanceNoRegionsTest) {
VPBlockUtils::connectBlocks(VPBB2, VPBB4);
VPBlockUtils::connectBlocks(VPBB3, VPBB4);
- auto TC = std::make_unique<VPValue>();
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB0);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -123,11 +122,10 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
VPBlockUtils::connectBlocks(R2BB1, R2BB2);
VPBlockUtils::connectBlocks(R1, R2);
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB0);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -206,11 +204,10 @@ TEST(VPDominatorTreeTest, DominanceRegionsTest) {
VPBasicBlock *VPBB2 = new VPBasicBlock("VPBB2");
VPBlockUtils::connectBlocks(R1, VPBB2);
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index 97a1f81..6402d6f 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -257,11 +257,10 @@ TEST(VPBasicBlockTest, getPlan) {
VPBlockUtils::connectBlocks(VPBB2, VPBB4);
VPBlockUtils::connectBlocks(VPBB3, VPBB4);
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, VPBB2->getPlan());
@@ -280,11 +279,10 @@ TEST(VPBasicBlockTest, getPlan) {
VPBasicBlock *VPBB1 = new VPBasicBlock();
VPBlockUtils::connectBlocks(VPBB1, R1);
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, R1->getPlan());
@@ -313,11 +311,10 @@ TEST(VPBasicBlockTest, getPlan) {
VPBlockUtils::connectBlocks(R1, VPBB2);
VPBlockUtils::connectBlocks(R2, VPBB2);
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
EXPECT_EQ(&Plan, VPBB1->getPlan());
EXPECT_EQ(&Plan, R1->getPlan());
@@ -360,11 +357,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
EXPECT_EQ(VPBB2, FromIterator[1]);
// Use Plan to properly clean up created blocks.
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB4, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
}
{
@@ -463,11 +459,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
EXPECT_EQ(R1, FromIterator[7]);
// Use Plan to properly clean up created blocks.
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R2, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB0);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
}
{
@@ -549,11 +544,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
EXPECT_EQ(VPBB1, FromIterator[9]);
// Use Plan to properly clean up created blocks.
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
}
{
@@ -600,11 +594,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
EXPECT_EQ(VPBB1, FromIterator[4]);
// Use Plan to properly clean up created blocks.
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
}
{
@@ -695,11 +688,10 @@ TEST(VPBasicBlockTest, TraversingIteratorTest) {
EXPECT_EQ(VPBB1, FromIterator[3]);
// Use Plan to properly clean up created blocks.
- auto TC = std::make_unique<VPValue>();
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(VPBB2, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
}
delete ScalarHeader;
}
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
index 570d202..6448153 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
@@ -24,7 +24,6 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
VPBB1->appendRecipe(UseI);
VPBB1->appendRecipe(DefI);
- auto TC = std::make_unique<VPValue>();
VPBasicBlock *VPBB2 = new VPBasicBlock();
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);
@@ -34,7 +33,7 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefSameBB) {
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -66,13 +65,12 @@ TEST(VPVerifierTest, VPInstructionUseBeforeDefDifferentBB) {
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);
- auto TC = std::make_unique<VPValue>();
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -115,12 +113,11 @@ TEST(VPVerifierTest, VPBlendUseBeforeDefDifferentBB) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);
- auto TC = std::make_unique<VPValue>();
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -156,13 +153,12 @@ TEST(VPVerifierTest, DuplicateSuccessorsOutsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBlockUtils::connectBlocks(VPBB1, R1);
- auto TC = std::make_unique<VPValue>();
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -199,13 +195,12 @@ TEST(VPVerifierTest, DuplicateSuccessorsInsideRegion) {
VPBlockUtils::connectBlocks(VPBB1, R1);
VPBB3->setParent(R1);
- auto TC = std::make_unique<VPValue>();
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
#if GTEST_HAS_STREAM_REDIRECTION
::testing::internal::CaptureStderr();
@@ -233,13 +228,12 @@ TEST(VPVerifierTest, BlockOutsideRegionWithParent) {
VPRegionBlock *R1 = new VPRegionBlock(VPBB2, VPBB2, "R1");
VPBlockUtils::connectBlocks(VPBB1, R1);
- auto TC = std::make_unique<VPValue>();
LLVMContext C;
auto *ScalarHeader = BasicBlock::Create(C, "");
VPIRBasicBlock *ScalarHeaderVPBB = new VPIRBasicBlock(ScalarHeader);
VPBlockUtils::connectBlocks(R1, ScalarHeaderVPBB);
VPBlockUtils::connectBlocks(VPPH, VPBB1);
- VPlan Plan(VPPH, &*TC, ScalarHeaderVPBB);
+ VPlan Plan(VPPH, ScalarHeaderVPBB);
VPBB1->setParent(R1);
#if GTEST_HAS_STREAM_REDIRECTION
diff --git a/llvm/utils/gn/secondary/lld/Common/BUILD.gn b/llvm/utils/gn/secondary/lld/Common/BUILD.gn
index b528654..c0b1c45 100644
--- a/llvm/utils/gn/secondary/lld/Common/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/Common/BUILD.gn
@@ -33,6 +33,7 @@ static_library("Common") {
]
sources = [
"Args.cpp",
+ "BPSectionOrdererBase.cpp",
"CommonLinkerContext.cpp",
"DWARF.cpp",
"DriverDispatcher.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn
index 274f5b5..6552645 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Passes/BUILD.gn
@@ -21,6 +21,7 @@ static_library("Passes") {
]
sources = [
"CodeGenPassBuilder.cpp",
+ "DroppedVariableStats.cpp",
"OptimizationLevel.cpp",
"PassBuilder.cpp",
"PassBuilderBindings.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
index e4ca566..0f34231 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
@@ -28,7 +28,7 @@ unittest("IRTests") {
"DemandedBitsTest.cpp",
"DominatorTreeBatchUpdatesTest.cpp",
"DominatorTreeTest.cpp",
- "DroppedVariableStatsTest.cpp",
+ "DroppedVariableStatsIRTest.cpp",
"FunctionTest.cpp",
"IRBuilderTest.cpp",
"InstructionsTest.cpp",
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 4866e31..983f7a2 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -459,7 +459,8 @@ public:
/// Starting from `value`, follow the use-def chain in reverse, always
/// selecting the aliasing OpOperands. Find and return Values for which
/// `condition` evaluates to true. OpOperands of such matching Values are not
- /// traversed any further.
+ /// traversed any further, the visited aliasing opOperands will be preserved
+ /// through `visitedOpOperands`.
///
/// When reaching the end of a chain, also return the last Value of that
/// chain if `config.alwaysIncludeLeaves` is set.
@@ -484,7 +485,8 @@ public:
/// `config`.
SetVector<Value> findValueInReverseUseDefChain(
Value value, llvm::function_ref<bool(Value)> condition,
- TraversalConfig config = TraversalConfig()) const;
+ TraversalConfig config = TraversalConfig(),
+ llvm::DenseSet<OpOperand *> *visitedOpOperands = nullptr) const;
/// Find the values that may define the contents of the given value at
/// runtime. A block argument is always a definition. An OpResult is a
diff --git a/mlir/include/mlir/IR/Constraints.td b/mlir/include/mlir/IR/Constraints.td
index 13223aa..62bc84c 100644
--- a/mlir/include/mlir/IR/Constraints.td
+++ b/mlir/include/mlir/IR/Constraints.td
@@ -93,6 +93,12 @@ class Or<list<Pred> children> : CombinedPred<PredCombinerOr, children>;
// A predicate that holds if its child does not.
class Neg<Pred child> : CombinedPred<PredCombinerNot, [child]>;
+// A predicate that is always true.
+defvar TruePred = And<[]>;
+
+// A predicate that is always false.
+defvar False = Or<[]>;
+
// A predicate that substitutes "pat" with "repl" in predicate calls of the
// leaves of the predicate tree (i.e., not CombinedPred).
//
diff --git a/mlir/include/mlir/IR/Properties.td b/mlir/include/mlir/IR/Properties.td
index 0becf7d..df630ad 100644
--- a/mlir/include/mlir/IR/Properties.td
+++ b/mlir/include/mlir/IR/Properties.td
@@ -13,6 +13,8 @@
#ifndef PROPERTIES
#define PROPERTIES
+include "mlir/IR/Constraints.td"
+
// Base class for defining properties.
class Property<string storageTypeParam = "", string desc = ""> {
// User-readable one line summary used in error reporting messages. If empty,
@@ -63,6 +65,13 @@ class Property<string storageTypeParam = "", string desc = ""> {
return convertFromAttribute($_storage, $_attr, $_diag);
}];
+ // The verification predicate for this property. Defaults to the true predicate,
+ // since properties are always their expected type.
+ // Within the predicate, `$_self` is an instance of the **interface**
+ // type of the property. Setting this field to ? will also result in a
+ // true predicate but is not recommended, as it breaks composability.
+ Pred predicate = TruePred;
+
// The call expression to hash the property.
//
// Format:
@@ -150,8 +159,8 @@ class Property<string storageTypeParam = "", string desc = ""> {
return ::mlir::failure();
}];
- // Base definition for the property. (Will be) used for `OptionalProperty` and
- // such cases, analogously to `baseAttr`.
+ // Base definition for the property. Used to look through `OptionalProperty`
+ // for some format generation, as with the `baseAttr` field on attributes.
Property baseProperty = ?;
// Default value for the property within its storage. This should be an expression
@@ -224,8 +233,7 @@ def I64Property : IntProperty<"int64_t">;
class EnumProperty<string storageTypeParam, string desc = "", string default = ""> :
Property<storageTypeParam, desc> {
- // TODO: take advantage of EnumAttrInfo and the like to make this share nice
- // parsing code with EnumAttr.
+ // TODO: implement predicate for enum validity.
let writeToMlirBytecode = [{
$_writer.writeVarInt(static_cast<uint64_t>($_storage));
}];
@@ -331,6 +339,59 @@ def UnitProperty : Property<"bool", "unit property"> {
}
//===----------------------------------------------------------------------===//
+// Property field overwrites
+
+/// Class for giving a property a default value.
+/// This doesn't change anything about the property other than giving it a default
+/// which can be used by ODS to elide printing.
+class DefaultValuedProperty<Property p, string default = "", string storageDefault = ""> : Property<p.storageType, p.summary> {
+ let defaultValue = default;
+ let storageTypeValueOverride = storageDefault;
+ let baseProperty = p;
+ // Keep up to date with `Property` above.
+ let summary = p.summary;
+ let description = p.description;
+ let storageType = p.storageType;
+ let interfaceType = p.interfaceType;
+ let convertFromStorage = p.convertFromStorage;
+ let assignToStorage = p.assignToStorage;
+ let convertToAttribute = p.convertToAttribute;
+ let convertFromAttribute = p.convertFromAttribute;
+ let predicate = p.predicate;
+ let hashProperty = p.hashProperty;
+ let parser = p.parser;
+ let optionalParser = p.optionalParser;
+ let printer = p.printer;
+ let readFromMlirBytecode = p.readFromMlirBytecode;
+ let writeToMlirBytecode = p.writeToMlirBytecode;
+}
+
+/// Apply the predicate `pred` to the property `p`, ANDing it with any
+/// predicates it may already have. If `newSummary` is provided, replace the
+/// summary of `p` with `newSummary`.
+class ConfinedProperty<Property p, Pred pred, string newSummary = "">
+ : Property<p.storageType, !if(!empty(newSummary), p.summary, newSummary)> {
+ let predicate = !if(!ne(p.predicate, TruePred), And<[p.predicate, pred]>, pred);
+ let baseProperty = p;
+ // Keep up to date with `Property` above.
+ let description = p.description;
+ let storageType = p.storageType;
+ let interfaceType = p.interfaceType;
+ let convertFromStorage = p.convertFromStorage;
+ let assignToStorage = p.assignToStorage;
+ let convertToAttribute = p.convertToAttribute;
+ let convertFromAttribute = p.convertFromAttribute;
+ let hashProperty = p.hashProperty;
+ let parser = p.parser;
+ let optionalParser = p.optionalParser;
+ let printer = p.printer;
+ let readFromMlirBytecode = p.readFromMlirBytecode;
+ let writeToMlirBytecode = p.writeToMlirBytecode;
+ let defaultValue = p.defaultValue;
+ let storageTypeValueOverride = p.storageTypeValueOverride;
+}
+
+//===----------------------------------------------------------------------===//
// Primitive property combinators
/// Create a variable named `name` of `prop`'s storage type that is initialized
@@ -342,14 +403,35 @@ class _makePropStorage<Property prop, string name> {
true : "") # ";";
}
+/// Construct a `Pred`icate `ret` that wraps the predicate of the underlying
+/// property `childProp` with:
+///
+/// [](childProp.storageType& s) {
+/// return [](childProp.interfaceType i) {
+/// return leafSubst(childProp.predicate, "$_self" to "i");
+/// }(childProp.convertFromStorage(s))
+/// }
+///
+/// and then appends `prefix` and `suffix`.
+class _makeStorageWrapperPred<Property wrappedProp> {
+ Pred ret =
+ Concat<
+ "[](" # "const " # wrappedProp.storageType
+ # "& baseStore) -> bool { return []("
+ # wrappedProp.interfaceType # " baseIface) -> bool { return (",
+ SubstLeaves<"$_self", "baseIface", wrappedProp.predicate>,
+ "); }(" # !subst("$_storage", "baseStore", wrappedProp.convertFromStorage)
+ # "); }">;
+}
+
/// The generic class for arrays of some other property, which is stored as a
/// `SmallVector` of that property. This uses an `ArrayAttr` as its attribute form
/// though subclasses can override this, as is the case with IntArrayAttr below.
/// Those wishing to use a non-default number of SmallVector elements should
/// subclass `ArrayProperty`.
-class ArrayProperty<Property elem = Property<>, string desc = ""> :
- Property<"::llvm::SmallVector<" # elem.storageType # ">", desc> {
- let summary = "array of " # elem.summary;
+class ArrayProperty<Property elem = Property<>, string newSummary = ""> :
+ Property<"::llvm::SmallVector<" # elem.storageType # ">",
+ !if(!empty(newSummary), "array of " # elem.summary, newSummary)> {
let interfaceType = "::llvm::ArrayRef<" # elem.storageType # ">";
let convertFromStorage = "::llvm::ArrayRef<" # elem.storageType # ">{$_storage}";
let assignToStorage = "$_storage.assign($_value.begin(), $_value.end())";
@@ -382,6 +464,10 @@ class ArrayProperty<Property elem = Property<>, string desc = ""> :
return ::mlir::ArrayAttr::get($_ctxt, elems);
}];
+ let predicate = !if(!eq(elem.predicate, TruePred),
+ TruePred,
+ Concat<"::llvm::all_of($_self, ", _makeStorageWrapperPred<elem>.ret, ")">);
+
defvar theParserBegin = [{
auto& storage = $_storage;
auto parseElemFn = [&]() -> ::mlir::ParseResult {
@@ -463,8 +549,8 @@ class ArrayProperty<Property elem = Property<>, string desc = ""> :
}]);
}
-class IntArrayProperty<string storageTypeParam = "", string desc = ""> :
- ArrayProperty<IntProperty<storageTypeParam, desc>> {
+class IntArrayProperty<Property elem, string newSummary=""> :
+ ArrayProperty<elem, newSummary> {
// Bring back the trivial conversions we don't get in the general case.
let convertFromAttribute = [{
return convertFromAttribute($_storage, $_attr, $_diag);
@@ -474,30 +560,6 @@ class IntArrayProperty<string storageTypeParam = "", string desc = ""> :
}];
}
-/// Class for giving a property a default value.
-/// This doesn't change anything about the property other than giving it a default
-/// which can be used by ODS to elide printing.
-class DefaultValuedProperty<Property p, string default = "", string storageDefault = ""> : Property<p.storageType, p.summary> {
- let defaultValue = default;
- let storageTypeValueOverride = storageDefault;
- let baseProperty = p;
- // Keep up to date with `Property` above.
- let summary = p.summary;
- let description = p.description;
- let storageType = p.storageType;
- let interfaceType = p.interfaceType;
- let convertFromStorage = p.convertFromStorage;
- let assignToStorage = p.assignToStorage;
- let convertToAttribute = p.convertToAttribute;
- let convertFromAttribute = p.convertFromAttribute;
- let hashProperty = p.hashProperty;
- let parser = p.parser;
- let optionalParser = p.optionalParser;
- let printer = p.printer;
- let readFromMlirBytecode = p.readFromMlirBytecode;
- let writeToMlirBytecode = p.writeToMlirBytecode;
-}
-
/// An optional property, stored as an std::optional<p.storageType>
/// interfaced with as an std::optional<p.interfaceType>..
/// The syntax is `none` (or empty string if elided) for an absent value or
@@ -575,6 +637,11 @@ class OptionalProperty<Property p, bit canDelegateParsing = 1>
return ::mlir::ArrayAttr::get($_ctxt, {attr});
}];
+ let predicate = !if(!ne(p.predicate, TruePred),
+ Or<[CPred<"!$_self.has_value()">,
+ SubstLeaves<"$_self", "(*($_self))", p.predicate>]>,
+ TruePred);
+
defvar delegatedParserBegin = [{
if (::mlir::succeeded($_parser.parseOptionalKeyword("none"))) {
$_storage = std::nullopt;
diff --git a/mlir/include/mlir/TableGen/Property.h b/mlir/include/mlir/TableGen/Property.h
index 702e675..3861591 100644
--- a/mlir/include/mlir/TableGen/Property.h
+++ b/mlir/include/mlir/TableGen/Property.h
@@ -27,6 +27,7 @@ namespace mlir {
namespace tblgen {
class Dialect;
class Type;
+class Pred;
// Wrapper class providing helper methods for accessing MLIR Property defined
// in TableGen. This class should closely reflect what is defined as class
@@ -74,6 +75,10 @@ public:
return convertFromAttributeCall;
}
+ // Return the property's predicate. Properties that didn't come from
+ // tablegen (the hardcoded ones) have the null predicate.
+ Pred getPredicate() const;
+
// Returns the method call which parses this property from textual MLIR.
StringRef getParserCall() const { return parserCall; }
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 065739e..f8a7a22 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -483,10 +483,12 @@ bool AnalysisState::isValueRead(Value value) const {
// Starting from `value`, follow the use-def chain in reverse, always selecting
// the aliasing OpOperands. Find and return Values for which `condition`
// evaluates to true. OpOperands of such matching Values are not traversed any
-// further.
+// further, the visited aliasing opOperands will be preserved through
+// `visitedOpOperands`.
llvm::SetVector<Value> AnalysisState::findValueInReverseUseDefChain(
Value value, llvm::function_ref<bool(Value)> condition,
- TraversalConfig config) const {
+ TraversalConfig config,
+ llvm::DenseSet<OpOperand *> *visitedOpOperands) const {
llvm::DenseSet<Value> visited;
llvm::SetVector<Value> result, workingSet;
workingSet.insert(value);
@@ -553,6 +555,8 @@ llvm::SetVector<Value> AnalysisState::findValueInReverseUseDefChain(
}
workingSet.insert(a.opOperand->get());
+ if (visitedOpOperands)
+ visitedOpOperands->insert(a.opOperand);
}
}
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
index cb2efef..abc0635 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
@@ -48,27 +48,20 @@ neededValuesDominateInsertionPoint(const DominanceInfo &domInfo,
return true;
}
-/// Return true if the given `insertionPoint` dominates all uses of
-/// `emptyTensorOp`.
-static bool insertionPointDominatesUses(const DominanceInfo &domInfo,
- Operation *insertionPoint,
- Operation *emptyTensorOp) {
- return llvm::all_of(emptyTensorOp->getUsers(), [&](Operation *user) {
- return domInfo.dominates(insertionPoint, user);
- });
-}
-
-/// Find a valid insertion point for a replacement of `emptyTensorOp`, assuming
-/// that the replacement may use any value from `neededValues`.
+/// Find a valid insertion point for a replacement of `emptyTensorOp`'s
+/// use of `user` operation, assuming that the replacement may use any
+/// value from `neededValues`.
static Operation *
-findValidInsertionPoint(Operation *emptyTensorOp,
+findValidInsertionPoint(Operation *emptyTensorOp, Operation *user,
const SmallVector<Value> &neededValues) {
DominanceInfo domInfo;
+ Operation *candidateInsertionPoint = emptyTensorOp;
- // Gather all possible insertion points: the location of `emptyTensorOp` and
- // right after the definition of each value in `neededValues`.
+ // Gather all possible insertion points: the location of
+ // `candidateInsertionPoint` and right after the definition of each value in
+ // `neededValues`.
SmallVector<Operation *> insertionPointCandidates;
- insertionPointCandidates.push_back(emptyTensorOp);
+ insertionPointCandidates.push_back(candidateInsertionPoint);
for (Value val : neededValues) {
// Note: The anchor op is using all of `neededValues`, so:
// * in case of a block argument: There must be at least one op in the block
@@ -90,8 +83,8 @@ findValidInsertionPoint(Operation *emptyTensorOp,
if (!neededValuesDominateInsertionPoint(domInfo, insertionPoint,
neededValues))
continue;
- // Check if the insertion point is before all uses.
- if (!insertionPointDominatesUses(domInfo, insertionPoint, emptyTensorOp))
+ // Check if the insertion point is before the use to be replaced.
+ if (!domInfo.dominates(insertionPoint, user))
continue;
return insertionPoint;
}
@@ -103,8 +96,9 @@ findValidInsertionPoint(Operation *emptyTensorOp,
LogicalResult mlir::bufferization::eliminateEmptyTensors(
RewriterBase &rewriter, Operation *op, OneShotAnalysisState &state) {
OpBuilder::InsertionGuard g(rewriter);
-
+ llvm::DenseSet<OpOperand *> visitedOpOperands;
op->walk([&](SubsetInsertionOpInterface op) {
+ visitedOpOperands.clear();
OpOperand &source = op.getSourceOperand();
// Skip operands that do not bufferize inplace. "tensor.empty" could still
// be replaced, but the transformation may not be beneficial.
@@ -131,16 +125,28 @@ LogicalResult mlir::bufferization::eliminateEmptyTensors(
config.followSameTypeOrCastsOnly = true;
SetVector<Value> emptyTensors = state.findValueInReverseUseDefChain(
source.get(), /*condition=*/
- [&](Value val) { return val.getDefiningOp<tensor::EmptyOp>(); },
- config);
+ [&](Value val) { return val.getDefiningOp<tensor::EmptyOp>(); }, config,
+ &visitedOpOperands);
for (Value v : emptyTensors) {
Operation *emptyTensorOp = v.getDefiningOp();
+ // Find the use to be replaced from the use-def chain.
+ auto iter = llvm::find_if(
+ visitedOpOperands, [&emptyTensorOp](OpOperand *opOperand) {
+ return llvm::count(emptyTensorOp->getUses(), *opOperand);
+ });
+ // This could be achieved when a use of `emptyTensorOp` is being
+ // consumed by `SubsetInsertionOpInterface`'s source directly.
+ if (iter == visitedOpOperands.end())
+ continue;
+ OpOperand *useToBeReplaced = *iter;
+ Operation *user = useToBeReplaced->getOwner();
+
// Find a suitable insertion point. If no suitable insertion point for
// the replacement can be found, skip this replacement.
Operation *insertionPoint =
- findValidInsertionPoint(emptyTensorOp, neededValues);
+ findValidInsertionPoint(emptyTensorOp, user, neededValues);
if (!insertionPoint)
continue;
@@ -159,8 +165,10 @@ LogicalResult mlir::bufferization::eliminateEmptyTensors(
replacement = rewriter.create<tensor::CastOp>(v.getLoc(), v.getType(),
replacement);
}
- // Replace the tensor::EmptyOp.
- rewriter.replaceOp(emptyTensorOp, replacement);
+ // Replace the specific use of the tensor::EmptyOp.
+ rewriter.modifyOpInPlace(user, [&]() {
+ user->setOperand(useToBeReplaced->getOperandNumber(), replacement);
+ });
state.resetCache();
}
diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
index ad70981..491b5f4 100644
--- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp
@@ -327,8 +327,8 @@ SmallVector<int64_t> vector::getAsIntegers(ArrayRef<OpFoldResult> foldResults) {
SmallVector<int64_t> ints;
llvm::transform(
foldResults, std::back_inserter(ints), [](OpFoldResult foldResult) {
- assert(foldResult.is<Attribute>() && "Unexpected non-constant index");
- return cast<IntegerAttr>(foldResult.get<Attribute>()).getInt();
+ assert(isa<Attribute>(foldResult) && "Unexpected non-constant index");
+ return cast<IntegerAttr>(cast<Attribute>(foldResult)).getInt();
});
return ints;
}
@@ -346,7 +346,7 @@ SmallVector<Value> vector::getAsValues(OpBuilder &builder, Location loc,
loc, cast<IntegerAttr>(attr).getInt())
.getResult();
- return foldResult.get<Value>();
+ return cast<Value>(foldResult);
});
return values;
}
@@ -1353,8 +1353,8 @@ LogicalResult vector::ExtractOp::verify() {
return emitOpError(
"expected position attribute of rank no greater than vector rank");
for (auto [idx, pos] : llvm::enumerate(position)) {
- if (pos.is<Attribute>()) {
- int64_t constIdx = cast<IntegerAttr>(pos.get<Attribute>()).getInt();
+ if (auto attr = dyn_cast<Attribute>(pos)) {
+ int64_t constIdx = cast<IntegerAttr>(attr).getInt();
if (constIdx < 0 || constIdx >= getSourceVectorType().getDimSize(idx)) {
return emitOpError("expected position attribute #")
<< (idx + 1)
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
index bd5f06a..b0892d1 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp
@@ -251,7 +251,7 @@ static SmallVector<int64_t> getReducedShape(ArrayRef<OpFoldResult> mixedSizes) {
continue;
}
- auto value = cast<IntegerAttr>(size.get<Attribute>()).getValue();
+ auto value = cast<IntegerAttr>(cast<Attribute>(size)).getValue();
if (value == 1)
continue;
reducedShape.push_back(value.getSExtValue());
@@ -570,8 +570,8 @@ static SmallVector<Value> getCollapsedIndices(RewriterBase &rewriter,
collapsedOffset = affine::makeComposedFoldedAffineApply(
rewriter, loc, collapsedExpr, collapsedVals);
- if (collapsedOffset.is<Value>()) {
- indicesAfterCollapsing.push_back(collapsedOffset.get<Value>());
+ if (auto value = dyn_cast<Value>(collapsedOffset)) {
+ indicesAfterCollapsing.push_back(value);
} else {
indicesAfterCollapsing.push_back(rewriter.create<arith::ConstantIndexOp>(
loc, *getConstantIntValue(collapsedOffset)));
@@ -841,8 +841,8 @@ class RewriteScalarExtractElementOfTransferRead
OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
rewriter, loc, sym0 + sym1,
{newIndices[newIndices.size() - 1], extractOp.getPosition()});
- if (ofr.is<Value>()) {
- newIndices[newIndices.size() - 1] = ofr.get<Value>();
+ if (auto value = dyn_cast<Value>(ofr)) {
+ newIndices[newIndices.size() - 1] = value;
} else {
newIndices[newIndices.size() - 1] =
rewriter.create<arith::ConstantIndexOp>(loc,
@@ -879,14 +879,14 @@ class RewriteScalarExtractOfTransferRead
SmallVector<Value> newIndices(xferOp.getIndices().begin(),
xferOp.getIndices().end());
for (auto [i, pos] : llvm::enumerate(extractOp.getMixedPosition())) {
- assert(pos.is<Attribute>() && "Unexpected non-constant index");
- int64_t offset = cast<IntegerAttr>(pos.get<Attribute>()).getInt();
+ assert(isa<Attribute>(pos) && "Unexpected non-constant index");
+ int64_t offset = cast<IntegerAttr>(cast<Attribute>(pos)).getInt();
int64_t idx = newIndices.size() - extractOp.getNumIndices() + i;
OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
rewriter, extractOp.getLoc(),
rewriter.getAffineSymbolExpr(0) + offset, {newIndices[idx]});
- if (ofr.is<Value>()) {
- newIndices[idx] = ofr.get<Value>();
+ if (auto value = dyn_cast<Value>(ofr)) {
+ newIndices[idx] = value;
} else {
newIndices[idx] = rewriter.create<arith::ConstantIndexOp>(
extractOp.getLoc(), *getConstantIntValue(ofr));
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index 20cd9cb..21ec718e 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -598,9 +598,9 @@ struct BubbleDownVectorBitCastForExtract
// Get the first element of the mixed position as integer.
auto mixedPos = extractOp.getMixedPosition();
- if (mixedPos.size() > 0 && !mixedPos[0].is<Attribute>())
+ if (mixedPos.size() > 0 && !isa<Attribute>(mixedPos[0]))
return failure();
- uint64_t index = cast<IntegerAttr>(mixedPos[0].get<Attribute>()).getInt();
+ uint64_t index = cast<IntegerAttr>(cast<Attribute>(mixedPos[0])).getInt();
// Get the single scalar (as a vector) in the source value that packs the
// desired scalar. E.g. extract vector<1xf32> from vector<4xf32>
diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp
index 2291d64..59df0cd 100644
--- a/mlir/lib/IR/AffineExpr.cpp
+++ b/mlir/lib/IR/AffineExpr.cpp
@@ -1385,7 +1385,7 @@ LogicalResult SimpleAffineExprFlattener::visitModExpr(AffineBinaryOpExpr expr) {
lhs[getLocalVarStartIndex() + numLocals - 1] = -rhsConst;
} else {
// Reuse the existing local id.
- lhs[getLocalVarStartIndex() + loc] = -rhsConst;
+ lhs[getLocalVarStartIndex() + loc] -= rhsConst;
}
return success();
}
diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp
index f71dd0b..e5cfc07 100644
--- a/mlir/lib/TableGen/Predicate.cpp
+++ b/mlir/lib/TableGen/Predicate.cpp
@@ -235,6 +235,16 @@ propagateGroundTruth(PredNode *node,
return node;
}
+ if (node->kind == PredCombinerKind::And && node->children.empty()) {
+ node->kind = PredCombinerKind::True;
+ return node;
+ }
+
+ if (node->kind == PredCombinerKind::Or && node->children.empty()) {
+ node->kind = PredCombinerKind::False;
+ return node;
+ }
+
// Otherwise, look at child nodes.
// Move child nodes into some local variable so that they can be optimized
diff --git a/mlir/lib/TableGen/Property.cpp b/mlir/lib/TableGen/Property.cpp
index b86b87df..1385116 100644
--- a/mlir/lib/TableGen/Property.cpp
+++ b/mlir/lib/TableGen/Property.cpp
@@ -14,6 +14,7 @@
#include "mlir/TableGen/Property.h"
#include "mlir/TableGen/Format.h"
#include "mlir/TableGen/Operator.h"
+#include "mlir/TableGen/Predicate.h"
#include "llvm/TableGen/Record.h"
using namespace mlir;
@@ -68,8 +69,8 @@ Property::Property(StringRef summary, StringRef description,
StringRef writeToMlirBytecodeCall,
StringRef hashPropertyCall, StringRef defaultValue,
StringRef storageTypeValueOverride)
- : summary(summary), description(description), storageType(storageType),
- interfaceType(interfaceType),
+ : def(nullptr), summary(summary), description(description),
+ storageType(storageType), interfaceType(interfaceType),
convertFromStorageCall(convertFromStorageCall),
assignToStorageCall(assignToStorageCall),
convertToAttributeCall(convertToAttributeCall),
@@ -91,6 +92,15 @@ StringRef Property::getPropertyDefName() const {
return def->getName();
}
+Pred Property::getPredicate() const {
+ if (!def)
+ return Pred();
+ const llvm::RecordVal *maybePred = def->getValue("predicate");
+ if (!maybePred || !maybePred->getValue())
+ return Pred();
+ return Pred(maybePred->getValue());
+}
+
Property Property::getBaseProperty() const {
if (const auto *defInit =
llvm::dyn_cast<llvm::DefInit>(def->getValueInit("baseProperty"))) {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
index 2ba8246..9150986 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir
@@ -55,6 +55,7 @@ func.func @buffer_forwarding_conflict_with_different_element_type(%arg0: tensor<
// CHECK: tensor.extract_slice
// CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
%cst = arith.constant 0.000000e+00 : f32
+ // CHECK: bufferization.alloc_tensor(%arg1)
%0 = tensor.empty(%arg1) : tensor<?xf32>
// CHECK: bufferization.alloc_tensor(%arg1)
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
index efe59af..2643477 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
@@ -365,3 +365,103 @@ func.func @multiple_materialize_in_destination_buffer(%m: memref<5xf32>, %f: f32
bufferization.materialize_in_destination %selected in restrict writable %m : (tensor<5xf32>, memref<5xf32>) -> ()
return
}
+
+// -----
+
+// `EmptyTensorElimination` fails to find a valid insertion
+// point for the new injected `SubsetExtraction`.
+// CHECK-LABEL: func.func @fail_to_eliminate_any_empty_tensors
+func.func @fail_to_eliminate_any_empty_tensors() -> tensor<5x6x128xf32> {
+ %cst_1 = arith.constant 1.0 : f32
+ %cst_2 = arith.constant 2.0 : f32
+ // CHECK: memref.alloc
+ // CHECK: memref.alloc
+ // CHECK: memref.alloc
+ %empty_1 = tensor.empty() : tensor<5x6x64xf32>
+ %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ %empty_2 = tensor.empty() : tensor<5x6x64xf32>
+ %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_2 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32>
+ // CHECK: memref.copy
+ %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ return %inserted_slice_2 : tensor<5x6x128xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func.func @succeed_to_eliminate_one_empty_tensor
+func.func @succeed_to_eliminate_one_empty_tensor() -> tensor<5x6x128xf32> {
+ %cst_1 = arith.constant 1.0 : f32
+ %cst_2 = arith.constant 2.0 : f32
+ // CHECK: memref.alloc() {alignment = 64 : i64} : memref<5x6x128xf32>
+ // CHECK: memref.alloc
+ // CHECK-NOT: memref.alloc
+ %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32>
+ %empty_1 = tensor.empty() : tensor<5x6x64xf32>
+ %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ %empty_2 = tensor.empty() : tensor<5x6x64xf32>
+ %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_2 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ // CHECK: memref.copy
+ %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ return %inserted_slice_2 : tensor<5x6x128xf32>
+}
+
+// -----
+
+// `EmptyTensorElimination` will replace the specific use of the tensor
+// empty with the new injected `SubsetExtraction`, i.e. the specific use
+// which has been tracked.
+
+// CHECK-ELIM-LABEL: func.func @mutli_use_of_the_same_tensor_empty
+// CHECK-LABEL: func.func @mutli_use_of_the_same_tensor_empty
+func.func @mutli_use_of_the_same_tensor_empty() -> tensor<5x6x128xf32> {
+ %cst_1 = arith.constant 1.0 : f32
+ %cst_2 = arith.constant 2.0 : f32
+ %cancatenated_empty = tensor.empty() : tensor<5x6x128xf32>
+ %empty_1 = tensor.empty() : tensor<5x6x64xf32>
+ // CHECK-ELIM: %[[VAL_3:.*]] = tensor.extract_slice
+ // CHECK-ELIM: linalg.fill ins(%[[VAL_0:.*]] : f32) outs(%[[VAL_3]]
+ // CHECK-ELIM-NOT: linalg.fill ins(%[[VAL_1:.*]] : f32) outs(%[[VAL_3]]
+ %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ %res_2 = linalg.fill ins(%cst_2 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ // CHECK: memref.copy
+ %inserted_slice_1 = tensor.insert_slice %res_1 into %cancatenated_empty[0, 0, 0][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ // CHECK-NOT: memref.copy
+ %inserted_slice_2 = tensor.insert_slice %res_2 into %inserted_slice_1[0, 0, 64][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ return %inserted_slice_2 : tensor<5x6x128xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func.func @mutli_use_of_the_same_tensor_empty_creates_non_existent_read
+// CHECK-ELIM-LABEL: func.func @mutli_use_of_the_same_tensor_empty_creates_non_existent_read
+func.func @mutli_use_of_the_same_tensor_empty_creates_non_existent_read(%arg1: tensor<5x6x128xf32> , %arg2: tensor<5x6x64xf32>)
+ -> (tensor<5x6x128xf32>, tensor<5x6x64xf32>) {
+ %cst_1 = arith.constant 1.0 : f32
+ %empty_1 = tensor.empty() : tensor<5x6x64xf32>
+ // CHECK: memref.alloc() {alignment = 64 : i64} : memref<5x6x64xf32>
+ // CHECK-NOT: memref.alloc
+ %res_1 = linalg.fill ins(%cst_1 : f32) outs(%empty_1 : tensor<5x6x64xf32>) -> tensor<5x6x64xf32>
+ %res_2 = linalg.generic{
+ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+ iterator_types = ["parallel", "parallel", "parallel"]
+ }
+ ins(%empty_1 : tensor<5x6x64xf32>)
+ outs(%arg2 :tensor<5x6x64xf32>) {
+ ^bb0(%in: f32, %out: f32):
+ %res = arith.addf %in, %in : f32
+ linalg.yield %res : f32
+ } -> tensor<5x6x64xf32>
+ // CHECK-NOT: memref.copy
+ %inserted_slice_1 = tensor.insert_slice %res_1 into %arg1[0, 0, 0][5, 6, 64][1, 1, 1]
+ : tensor<5x6x64xf32> into tensor<5x6x128xf32>
+ return %inserted_slice_1, %res_2 : tensor<5x6x128xf32>, tensor<5x6x64xf32>
+}
diff --git a/mlir/test/IR/test-op-property-predicates.mlir b/mlir/test/IR/test-op-property-predicates.mlir
new file mode 100644
index 0000000..3a4e297
--- /dev/null
+++ b/mlir/test/IR/test-op-property-predicates.mlir
@@ -0,0 +1,148 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'scalar' failed to satisfy constraint: non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = -1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'optional' failed to satisfy constraint: optional non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [-1 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'defaulted' failed to satisfy constraint: non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = -1 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'more_constrained' failed to satisfy constraint: between 0 and 5}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 100 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'array' failed to satisfy constraint: array of non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [-1 : i64],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_unconstrained' failed to satisfy constraint: non-empty array of int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_constrained' failed to satisfy constraint: non-empty array of non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_constrained' failed to satisfy constraint: non-empty array of non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [-1 : i64],
+ non_empty_optional = [[7 : i64]],
+ unconstrained = 8 : i64}>
+
+// -----
+
+// expected-error @+1 {{'test.op_with_property_predicates' op property 'non_empty_optional' failed to satisfy constraint: optional non-empty array of non-negative int64_t}}
+test.op_with_property_predicates <{
+ scalar = 1 : i64,
+ optional = [2 : i64],
+ defaulted = 3 : i64,
+ more_constrained = 4 : i64,
+ array = [],
+ non_empty_unconstrained = [5: i64],
+ non_empty_constrained = [6 : i64],
+ non_empty_optional = [[]],
+ unconstrained = 8 : i64}>
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index d24d52f..384139d 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -2998,7 +2998,7 @@ def TestOpWithProperties : TEST_Op<"with_properties"> {
StrAttr:$b, // Attributes can directly be used here.
StringProperty:$c,
BoolProperty:$flag,
- IntArrayProperty<"int64_t">:$array // example of an array
+ IntArrayProperty<I64Property>:$array // example of an array
);
}
@@ -3040,15 +3040,15 @@ def TestOpWithEmptyProperties : TEST_Op<"empty_properties"> {
def TestOpUsingPropertyInCustom : TEST_Op<"using_property_in_custom"> {
let assemblyFormat = "custom<UsingPropertyInCustom>($prop) attr-dict";
- let arguments = (ins IntArrayProperty<"int64_t">:$prop);
+ let arguments = (ins IntArrayProperty<I64Property>:$prop);
}
def TestOpUsingPropertyInCustomAndOther
: TEST_Op<"using_property_in_custom_and_other"> {
let assemblyFormat = "custom<UsingPropertyInCustom>($prop) prop-dict attr-dict";
let arguments = (ins
- IntArrayProperty<"int64_t">:$prop,
- IntProperty<"int64_t">:$other
+ IntArrayProperty<I64Property>:$prop,
+ I64Property:$other
);
}
@@ -3253,6 +3253,30 @@ def TestOpWithArrayProperties : TEST_Op<"with_array_properties"> {
);
}
+def NonNegativeI64Property : ConfinedProperty<I64Property,
+ CPred<"$_self >= 0">, "non-negative int64_t">;
+
+class NonEmptyArray<Property p> : ConfinedProperty
+ <ArrayProperty<p>, Neg<CPred<"$_self.empty()">>,
+ "non-empty array of " # p.summary>;
+
+def OpWithPropertyPredicates : TEST_Op<"op_with_property_predicates"> {
+ let arguments = (ins
+ NonNegativeI64Property:$scalar,
+ OptionalProperty<NonNegativeI64Property>:$optional,
+ DefaultValuedProperty<NonNegativeI64Property, "0">:$defaulted,
+ ConfinedProperty<NonNegativeI64Property,
+ CPred<"$_self <= 5">, "between 0 and 5">:$more_constrained,
+ ArrayProperty<NonNegativeI64Property>:$array,
+ NonEmptyArray<I64Property>:$non_empty_unconstrained,
+ NonEmptyArray<NonNegativeI64Property>:$non_empty_constrained,
+ // Test applying predicates when the fromStorage() on the optional<> isn't trivial.
+ OptionalProperty<NonEmptyArray<NonNegativeI64Property>>:$non_empty_optional,
+ I64Property:$unconstrained
+ );
+ let assemblyFormat = "attr-dict prop-dict";
+}
+
//===----------------------------------------------------------------------===//
// Test Dataflow
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/mlir-tblgen/op-properties-predicates.td b/mlir/test/mlir-tblgen/op-properties-predicates.td
new file mode 100644
index 0000000..fa06e14
--- /dev/null
+++ b/mlir/test/mlir-tblgen/op-properties-predicates.td
@@ -0,0 +1,73 @@
+// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s
+
+include "mlir/IR/Constraints.td"
+include "mlir/IR/EnumAttr.td"
+include "mlir/IR/OpBase.td"
+include "mlir/IR/Properties.td"
+
+def Test_Dialect : Dialect {
+ let name = "test";
+ let cppNamespace = "foobar";
+}
+class NS_Op<string mnemonic, list<Trait> traits = []> :
+ Op<Test_Dialect, mnemonic, traits>;
+
+def NonNegativeI64Property : ConfinedProperty<I64Property,
+ CPred<"$_self >= 0">, "non-negative int64_t">;
+
+class NonEmptyArray<Property p> : ConfinedProperty
+ <ArrayProperty<p>, Neg<CPred<"$_self.empty()">>,
+ "non-empty array of " # p.summary>;
+
+def OpWithPredicates : NS_Op<"op_with_predicates"> {
+ let arguments = (ins
+ NonNegativeI64Property:$scalar,
+ OptionalProperty<NonNegativeI64Property>:$optional,
+ DefaultValuedProperty<NonNegativeI64Property, "0">:$defaulted,
+ ConfinedProperty<NonNegativeI64Property,
+ CPred<"$_self <= 5">, "between 0 and 5">:$moreConstrained,
+ ArrayProperty<NonNegativeI64Property>:$array,
+ NonEmptyArray<I64Property>:$non_empty_unconstrained,
+ NonEmptyArray<NonNegativeI64Property>:$non_empty_constrained,
+ // Test applying predicates when the fromStorage() on the optional<> isn't trivial.
+ OptionalProperty<NonEmptyArray<NonNegativeI64Property>>:$non_empty_optional,
+ I64Property:$unconstrained
+ );
+}
+
+// CHECK-LABEL: OpWithPredicates::verifyInvariantsImpl()
+// Note: for test readibility, we capture [[maybe_unused]] into the variable maybe_unused
+// CHECK: [[maybe_unused:\[\[maybe_unused\]\]]] int64_t tblgen_scalar = this->getScalar();
+// CHECK: if (!((tblgen_scalar >= 0)))
+// CHECK-NEXT: return emitOpError("property 'scalar' failed to satisfy constraint: non-negative int64_t");
+
+// CHECK: [[maybe_unused]] std::optional<int64_t> tblgen_optional = this->getOptional();
+// CHECK: if (!(((!tblgen_optional.has_value())) || (((*(tblgen_optional)) >= 0))))
+// CHECK-NEXT: return emitOpError("property 'optional' failed to satisfy constraint: optional non-negative int64_t");
+
+// CHECK: [[maybe_unused]] int64_t tblgen_defaulted = this->getDefaulted();
+// CHECK: if (!((tblgen_defaulted >= 0)))
+// CHECK-NEXT: return emitOpError("property 'defaulted' failed to satisfy constraint: non-negative int64_t");
+
+// CHECK: [[maybe_unused]] int64_t tblgen_moreConstrained = this->getMoreConstrained();
+// CHECK: if (!(((tblgen_moreConstrained >= 0)) && ((tblgen_moreConstrained <= 5))))
+// CHECK-NEXT: return emitOpError("property 'moreConstrained' failed to satisfy constraint: between 0 and 5");
+
+// CHECK: [[maybe_unused]] ::llvm::ArrayRef<int64_t> tblgen_array = this->getArray();
+// CHECK: if (!(::llvm::all_of(tblgen_array, [](const int64_t& baseStore) -> bool { return [](int64_t baseIface) -> bool { return ((baseIface >= 0)); }(baseStore); })))
+// CHECK-NEXT: return emitOpError("property 'array' failed to satisfy constraint: array of non-negative int64_t");
+
+// CHECK: [[maybe_unused]] ::llvm::ArrayRef<int64_t> tblgen_nonEmptyUnconstrained = this->getNonEmptyUnconstrained();
+// CHECK: if (!(!((tblgen_nonEmptyUnconstrained.empty()))))
+// CHECK-NEXT: return emitOpError("property 'non_empty_unconstrained' failed to satisfy constraint: non-empty array of int64_t");
+
+// CHECK: [[maybe_unused]] ::llvm::ArrayRef<int64_t> tblgen_nonEmptyConstrained = this->getNonEmptyConstrained();
+// CHECK: if (!((::llvm::all_of(tblgen_nonEmptyConstrained, [](const int64_t& baseStore) -> bool { return [](int64_t baseIface) -> bool { return ((baseIface >= 0)); }(baseStore); })) && (!((tblgen_nonEmptyConstrained.empty())))))
+// CHECK-NEXT: return emitOpError("property 'non_empty_constrained' failed to satisfy constraint: non-empty array of non-negative int64_t");
+
+// CHECK: [[maybe_unused]] std::optional<::llvm::ArrayRef<int64_t>> tblgen_nonEmptyOptional = this->getNonEmptyOptional();
+// CHECK: (!(((!tblgen_nonEmptyOptional.has_value())) || ((::llvm::all_of((*(tblgen_nonEmptyOptional)), [](const int64_t& baseStore) -> bool { return [](int64_t baseIface) -> bool { return ((baseIface >= 0)); }(baseStore); })) && (!(((*(tblgen_nonEmptyOptional)).empty()))))))
+// CHECK-NEXT: return emitOpError("property 'non_empty_optional' failed to satisfy constraint: optional non-empty array of non-negative int64_t");
+
+// CHECK-NOT: int64_t tblgen_unconstrained
+// CHECK: return ::mlir::success();
diff --git a/mlir/test/mlir-tblgen/op-properties.td b/mlir/test/mlir-tblgen/op-properties.td
index 918583c..bf32ce4 100644
--- a/mlir/test/mlir-tblgen/op-properties.td
+++ b/mlir/test/mlir-tblgen/op-properties.td
@@ -20,7 +20,7 @@ def OpWithAttr : NS_Op<"op_with_attr">{
// Test required and optional properties
// ---
-def DefaultI64Array : IntArrayProperty<"int64_t"> {
+def DefaultI64Array : IntArrayProperty<I64Property> {
let defaultValue = "::llvm::ArrayRef<int64_t>{}";
let storageTypeValueOverride = "::llvm::SmallVector<int64_t>{}";
}
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 30a2d4d..a970cbc 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -1033,6 +1033,61 @@ while (true) {{
emitVerifier(namedAttr.attr, namedAttr.name, getVarName(namedAttr.name));
}
+static void genPropertyVerifier(const OpOrAdaptorHelper &emitHelper,
+ FmtContext &ctx, MethodBody &body) {
+
+ // Code to get a reference to a property into a variable to avoid multiple
+ // evaluations while verifying a property.
+ // {0}: Property variable name.
+ // {1}: Property name, with the first letter capitalized, to find the getter.
+ // {2}: Property interface type.
+ const char *const fetchProperty = R"(
+ [[maybe_unused]] {2} {0} = this->get{1}();
+)";
+
+ // Code to verify that the predicate of a property holds. Embeds the
+ // condition inline.
+ // {0}: Property condition code, with tgfmt() applied.
+ // {1}: Emit error prefix.
+ // {2}: Property name.
+ // {3}: Property description.
+ const char *const verifyProperty = R"(
+ if (!({0}))
+ return {1}"property '{2}' failed to satisfy constraint: {3}");
+)";
+
+ // Prefix variables with `tblgen_` to avoid hiding the attribute accessor.
+ const auto getVarName = [&](const NamedProperty &prop) {
+ std::string varName =
+ convertToCamelFromSnakeCase(prop.name, /*capitalizeFirst=*/false);
+ return (tblgenNamePrefix + Twine(varName)).str();
+ };
+
+ for (const NamedProperty &prop : emitHelper.getOp().getProperties()) {
+ Pred predicate = prop.prop.getPredicate();
+ // Null predicate, nothing to verify.
+ if (predicate == Pred())
+ continue;
+
+ std::string rawCondition = predicate.getCondition();
+ if (rawCondition == "true")
+ continue;
+ bool needsOp = StringRef(rawCondition).contains("$_op");
+ if (needsOp && !emitHelper.isEmittingForOp())
+ continue;
+
+ auto scope = body.scope("{\n", "}\n", /*indent=*/true);
+ std::string varName = getVarName(prop);
+ std::string getterName =
+ convertToCamelFromSnakeCase(prop.name, /*capitalizeFirst=*/true);
+ body << formatv(fetchProperty, varName, getterName,
+ prop.prop.getInterfaceType());
+ body << formatv(verifyProperty, tgfmt(rawCondition, &ctx.withSelf(varName)),
+ emitHelper.emitErrorPrefix(), prop.name,
+ prop.prop.getSummary());
+ }
+}
+
/// Include declarations specified on NativeTrait
static std::string formatExtraDeclarations(const Operator &op) {
SmallVector<StringRef> extraDeclarations;
@@ -3726,6 +3781,7 @@ void OpEmitter::genVerifier() {
bool useProperties = emitHelper.hasProperties();
populateSubstitutions(emitHelper, verifyCtx);
+ genPropertyVerifier(emitHelper, verifyCtx, implBody);
genAttributeVerifier(emitHelper, verifyCtx, implBody, staticVerifierEmitter,
useProperties);
genOperandResultVerifier(implBody, op.getOperands(), "operand");
diff --git a/mlir/unittests/IR/AffineExprTest.cpp b/mlir/unittests/IR/AffineExprTest.cpp
index 9e89a5b..8a2d697 100644
--- a/mlir/unittests/IR/AffineExprTest.cpp
+++ b/mlir/unittests/IR/AffineExprTest.cpp
@@ -129,3 +129,21 @@ TEST(AffineExprTest, d0PlusD0FloorDivNeg2) {
auto sum = d0 + d0.floorDiv(-2) * 2;
ASSERT_EQ(toString(sum), "d0 + (d0 floordiv -2) * 2");
}
+
+TEST(AffineExprTest, simpleAffineExprFlattenerRegression) {
+
+ // Regression test for a bug where mod simplification was not handled
+ // properly when `lhs % rhs` was happened to have the property that `lhs
+ // floordiv rhs = lhs`.
+ MLIRContext ctx;
+ OpBuilder b(&ctx);
+
+ auto d0 = b.getAffineDimExpr(0);
+
+ // Manually replace variables by constants to avoid constant folding.
+ AffineExpr expr = (d0 - (d0 + 2)).floorDiv(8) % 8;
+ AffineExpr result = mlir::simplifyAffineExpr(expr, 1, 0);
+
+ ASSERT_TRUE(isa<AffineConstantExpr>(result));
+ ASSERT_EQ(cast<AffineConstantExpr>(result).getValue(), 7);
+}