diff options
author | Joe Nash <Sisyph@users.noreply.github.com> | 2024-04-03 11:34:12 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-03 11:34:12 -0400 |
commit | 6a13bbf92f6f7f2f5d59dfda99ccca223c72eeef (patch) | |
tree | 16c186cbfb079f9370317574c855962c38b43569 | |
parent | 1aedf949e0f6d5e0a6b15e28780be126730db023 (diff) | |
download | llvm-6a13bbf92f6f7f2f5d59dfda99ccca223c72eeef.zip llvm-6a13bbf92f6f7f2f5d59dfda99ccca223c72eeef.tar.gz llvm-6a13bbf92f6f7f2f5d59dfda99ccca223c72eeef.tar.bz2 |
[AMDGPU][MC] Enables sgpr or imm src1 for float VOP3 DPP, but excludi… (#87382)
…ng VOPC.
Fixes support on GFX1150 and GFX12 where src1 of e64_dpp instructions
should allow sgpr and imm operands.
PR #67461 added support for this with int operands, but it was missing a
piece for float.
Changing VOPC e64_dpp will be in a different patch because there is a
bug preventing that change.
19 files changed, 2055 insertions, 19 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 294fc68..3866723 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4627,10 +4627,15 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, if (Src1Idx >= 0) { const MCOperand &Src1 = Inst.getOperand(Src1Idx); const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (Src1.isImm() || - (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]); - Error(Op.getStartLoc(), "invalid operand for instruction"); + if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) { + auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()); + SMLoc S = getRegLoc(Reg, Operands); + Error(S, "invalid operand for instruction"); + return false; + } + if (Src1.isImm()) { + Error(getInstLoc(Operands), + "src1 immediate operand invalid for instruction"); return false; } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1694436..f1afbcc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2268,7 +2268,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { field Operand Src1ModDPP = getSrcModDPP<Src1VT>.ret; field Operand Src2ModDPP = getSrcModDPP<Src2VT>.ret; field Operand Src0ModVOP3DPP = getSrcModDPP<Src0VT>.ret; - field Operand Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; + field Operand Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT>.ret; field Operand Src2ModVOP3DPP = getSrcModVOP3DPP<Src2VT>.ret; field Operand Src0ModSDWA = getSrcModSDWA<Src0VT>.ret; field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index f136a43..c001c5d 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -503,6 +503,7 @@ def VOP_MAC_F16_t16 : VOP_MAC <f16> { dpp8:$dpp8, Dpp8FI:$fi); let Src2Mod = FP32InputMods; // dummy unused modifiers let Src2RC64 = VGPRSrc_32; // stub argument + let Src1ModVOP3DPP = getSrcModVOP3DPP<Src1VT, 1/*IsFake16*/>.ret; } def VOP_MAC_F32 : VOP_MAC <f32>; let HasExtDPP = 0, HasExt32BitDPP = 0 in @@ -618,7 +619,7 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst); + let Outs64 = (outs DstRC64:$vdst); // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. @@ -652,7 +653,7 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { dpp8:$dpp8, Dpp8FI:$fi); let Src0ModVOP3DPP = FPVRegInputMods; - let Src1ModVOP3DPP = FPVRegInputMods; + let Src1ModVOP3DPP = FP32VCSrcInputMods; let HasExt = 1; let HasExtDPP = 1; @@ -662,7 +663,17 @@ class VOP2e_SGPR<list<ValueType> ArgVT> : VOPProfile<ArgVT> { } def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; -def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; +def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { + let IsTrue16 = 1; + let DstRC64 = getVALUDstForVT<DstVT>.ret; + + let Src0Mod = getSrcMod<f16>.ret; + let Src1Mod = getSrcMod<f16>.ret; + + let Src0VOP3DPP = VGPRSrc_32; + let Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP<f16, 1/*IsFake16*/>.ret; +} def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { let Outs32 = (outs SReg_32:$vdst); @@ -703,7 +714,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { //===----------------------------------------------------------------------===// let SubtargetPredicate = isGFX11Plus in -defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; +defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>; defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 022fb7c..16dd353 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -75,6 +75,8 @@ class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt let HasDst32 = 0; // VOPC disallows dst_sel and dst_unused as they have no effect on destination let EmitDstSel = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; let Outs64 = (outs VOPDstS64orS32:$sdst); let OutsVOP3DPP = Outs64; let OutsVOP3DPP8 = Outs64; @@ -112,6 +114,8 @@ class VOPC_NoSdst_Profile<list<SchedReadWrite> sched, ValueType vt0, "$src0, $src1"); let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; } multiclass VOPC_NoSdst_Profile_t16<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> { @@ -785,6 +789,8 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; } multiclass VOPC_Class_Profile_t16<list<SchedReadWrite> sched> { @@ -812,6 +818,8 @@ class VOPC_Class_NoSdst_Profile<list<SchedReadWrite> sched, ValueType src0VT, Va let AsmVOP3Base = "$src0_modifiers, $src1"; let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP<Src1VT>.ret; } multiclass VOPC_Class_NoSdst_Profile_t16<list<SchedReadWrite> sched> { diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s index 056221f..336dd8b 100644 --- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s @@ -23,3 +23,13 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] // GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, s2 row_mirror +// GFX1150: encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] + +v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf +// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] + +// This is a regression test for potential changes in the future. +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s index da1989e..3ec3162 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s @@ -51,13 +51,13 @@ v_add3_u32_e64_dpp v5, v1, s1, v0 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_cvt_f32_i32_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction @@ -135,7 +135,7 @@ v_fmac_f16_e64_dpp v5, s2, v3 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fmac_f16_e64_dpp v5, v2, 1.0 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_fmac_f32_e64_dpp v5, s2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction @@ -144,7 +144,7 @@ v_fmac_f32_e64_dpp v5, 0x1234, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fmac_f32_e64_dpp v5, v2, 1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_fmac_f32_e64_dpp v5, -1.0, v3 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s index bb911c6..f32b7da 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s @@ -23,6 +23,10 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] // GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// This is a regression test for potential changes in the future. +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] + // // Elements of CPol operand can be given in any order // diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 88bdb7e..d0e309a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -6,6 +6,12 @@ v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -57,6 +63,10 @@ v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror // W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -113,6 +123,10 @@ v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror // W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 // W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -155,6 +169,12 @@ v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -323,6 +343,12 @@ v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -365,6 +391,12 @@ v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -449,6 +481,12 @@ v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -575,6 +613,12 @@ v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -617,6 +661,12 @@ v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -659,6 +709,12 @@ v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -752,6 +808,14 @@ v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror // W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror // W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -808,6 +872,14 @@ v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror // W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 // W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -850,6 +922,12 @@ v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 ban v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -892,6 +970,12 @@ v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -934,6 +1018,12 @@ v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -976,6 +1066,12 @@ v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1378,6 +1474,12 @@ v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1588,6 +1690,12 @@ v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 b v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1630,6 +1738,12 @@ v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 ro v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_fma_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1672,6 +1786,12 @@ v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1756,6 +1876,9 @@ v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 ba v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1798,6 +1921,12 @@ v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1840,6 +1969,12 @@ v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1966,6 +2101,12 @@ v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 b v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2008,6 +2149,12 @@ v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2050,6 +2197,12 @@ v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2092,6 +2245,12 @@ v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2134,6 +2293,12 @@ v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2176,6 +2341,12 @@ v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2218,6 +2389,12 @@ v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2260,6 +2437,12 @@ v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2302,6 +2485,12 @@ v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2344,6 +2533,12 @@ v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2386,6 +2581,12 @@ v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2428,6 +2629,12 @@ v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2554,6 +2761,12 @@ v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2596,6 +2809,12 @@ v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2638,6 +2857,12 @@ v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2680,6 +2905,12 @@ v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ma v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2806,6 +3037,12 @@ v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2848,6 +3085,12 @@ v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2890,6 +3133,12 @@ v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2932,6 +3181,12 @@ v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2974,6 +3229,12 @@ v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3016,6 +3277,12 @@ v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3058,6 +3325,12 @@ v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3100,6 +3373,12 @@ v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3142,6 +3421,12 @@ v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3184,6 +3469,12 @@ v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3226,6 +3517,12 @@ v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3268,6 +3565,12 @@ v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3394,6 +3697,12 @@ v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3436,6 +3745,12 @@ v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3478,6 +3793,12 @@ v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3520,6 +3841,12 @@ v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ma v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3562,6 +3889,9 @@ v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ma v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3646,6 +3976,12 @@ v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bo v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3688,6 +4024,12 @@ v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3814,6 +4156,12 @@ v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mas v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3856,6 +4204,9 @@ v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3898,6 +4249,12 @@ v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 ba v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3940,6 +4297,12 @@ v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3982,6 +4345,9 @@ v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -4033,6 +4399,10 @@ v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4089,6 +4459,10 @@ v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror // W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 // W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4266,6 +4640,10 @@ v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4322,6 +4700,10 @@ v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror // W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 // W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4364,6 +4746,12 @@ v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 b v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -4406,6 +4794,12 @@ v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -4770,7 +5164,7 @@ v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 // GFX12: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] @@ -4791,7 +5185,7 @@ v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_ma // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 // GFX12: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] @@ -4973,6 +5367,12 @@ v_maximum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bou v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5015,6 +5415,12 @@ v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_m v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5057,6 +5463,12 @@ v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_m v_minimum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5099,6 +5511,12 @@ v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x v_maximum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maximum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maximum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5180,6 +5598,12 @@ v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5222,6 +5646,12 @@ v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maximumminimum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5264,6 +5694,12 @@ v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_m v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 0e84765..25b13ac 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -6,6 +6,12 @@ v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_add3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -47,6 +53,10 @@ v_add_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s105, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -67,6 +77,10 @@ v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -81,6 +95,12 @@ v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -144,6 +164,12 @@ v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_alignbit_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -177,6 +203,12 @@ v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -219,6 +251,12 @@ v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_and_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -273,6 +311,12 @@ v_bcnt_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_bfe_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -309,6 +353,12 @@ v_bfe_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_bfe_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -345,6 +395,12 @@ v_bfe_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_bfi_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -391,6 +447,14 @@ v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -423,12 +487,22 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 // W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubeid_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -465,6 +539,12 @@ v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubema_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -501,6 +581,12 @@ v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubesc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -537,6 +623,12 @@ v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubetc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -687,6 +779,12 @@ v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -771,6 +869,12 @@ v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -807,6 +911,12 @@ v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0 v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -843,6 +953,12 @@ v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0 v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_fma_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -891,6 +1007,9 @@ v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_lerp_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -927,6 +1046,12 @@ v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -963,6 +1088,12 @@ v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1017,6 +1148,12 @@ v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1050,6 +1187,12 @@ v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1086,6 +1229,12 @@ v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1122,6 +1271,12 @@ v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1155,6 +1310,12 @@ v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1191,6 +1352,12 @@ v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1227,6 +1394,12 @@ v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_max3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1263,6 +1436,12 @@ v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0, v_max3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1299,6 +1478,12 @@ v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0, v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1332,6 +1517,12 @@ v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1368,6 +1559,12 @@ v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1401,6 +1598,12 @@ v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1455,6 +1658,12 @@ v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1491,6 +1700,12 @@ v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1527,6 +1742,12 @@ v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1563,6 +1784,12 @@ v_maxmin_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1617,6 +1844,12 @@ v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1653,6 +1886,12 @@ v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0, v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1689,6 +1928,12 @@ v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0, v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1722,6 +1967,12 @@ v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1758,6 +2009,12 @@ v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1791,6 +2048,12 @@ v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1827,6 +2090,12 @@ v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1863,6 +2132,12 @@ v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0, v_min3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1899,6 +2174,12 @@ v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0, v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1932,6 +2213,12 @@ v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1968,6 +2255,12 @@ v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2001,6 +2294,12 @@ v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2055,6 +2354,12 @@ v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_minmax_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2091,6 +2396,12 @@ v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_minmax_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2127,6 +2438,12 @@ v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2163,6 +2480,12 @@ v_minmax_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2199,6 +2522,9 @@ v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_msad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2244,6 +2570,12 @@ v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mullit_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2280,6 +2612,12 @@ v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_or3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2337,6 +2675,12 @@ v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_perm_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2373,6 +2717,9 @@ v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2409,6 +2756,12 @@ v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2445,6 +2798,12 @@ v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2481,6 +2840,9 @@ v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2518,6 +2880,10 @@ v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x69,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2538,6 +2904,10 @@ v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x68,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2584,6 +2954,10 @@ v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x69,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2608,6 +2982,10 @@ v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2622,6 +3000,12 @@ v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_xad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2658,6 +3042,12 @@ v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_xor3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2983,7 +3373,7 @@ v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] // GFX12: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] @@ -3004,7 +3394,7 @@ v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] // GFX12: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] @@ -3066,6 +3456,12 @@ v_maximum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 v_minimum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3102,6 +3498,12 @@ v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0, v_maximum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3138,6 +3540,12 @@ v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0, v_minimum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3174,6 +3582,12 @@ v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] f v_maximum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3210,6 +3624,12 @@ v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] f v_maximumminimum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximumminimum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximumminimum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3246,6 +3666,12 @@ v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_minimummaximum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimummaximum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimummaximum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3282,6 +3708,12 @@ v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_maximumminimum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximumminimum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximumminimum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3318,6 +3750,12 @@ v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0, v_minimummaximum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimummaximum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimummaximum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s index ab88ec8..2b7830c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s @@ -128,6 +128,12 @@ v_add_f16_e64_dpp v5, v1, v2 row_shl:1 v_add_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x32,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_add_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -170,6 +176,12 @@ v_add_f32_e64_dpp v5, v1, v2 row_shl:1 v_add_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_add_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -212,6 +224,12 @@ v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_nc_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -254,6 +272,12 @@ v_and_b32_e64_dpp v5, v1, v2 row_shl:1 v_and_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_and_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_and_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -296,6 +320,12 @@ v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_ashrrev_i32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -445,6 +475,12 @@ v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -487,6 +523,12 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -529,6 +571,12 @@ v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_ldexp_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -571,6 +619,12 @@ v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_lshlrev_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -613,6 +667,12 @@ v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_lshrrev_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -655,6 +715,12 @@ v_max_num_f16_e64_dpp v5, v1, v2 row_shl:1 v_max_num_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x31,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_num_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -697,6 +763,12 @@ v_max_num_f32_e64_dpp v5, v1, v2 row_shl:1 v_max_num_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_num_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x16,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_num_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -739,6 +811,12 @@ v_max_i32_e64_dpp v5, v1, v2 row_shl:1 v_max_i32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_i32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_i32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -781,6 +859,12 @@ v_max_u32_e64_dpp v5, v1, v2 row_shl:1 v_max_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -823,6 +907,12 @@ v_min_num_f16_e64_dpp v5, v1, v2 row_shl:1 v_min_num_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_num_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -865,6 +955,12 @@ v_min_num_f32_e64_dpp v5, v1, v2 row_shl:1 v_min_num_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_num_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x15,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_num_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -907,6 +1003,12 @@ v_min_i32_e64_dpp v5, v1, v2 row_shl:1 v_min_i32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_i32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_i32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -949,6 +1051,12 @@ v_min_u32_e64_dpp v5, v1, v2 row_shl:1 v_min_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -991,6 +1099,12 @@ v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_dx9_zero_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1033,6 +1147,12 @@ v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x35,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1075,6 +1195,12 @@ v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x08,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1117,6 +1243,12 @@ v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_hi_i32_i24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1159,6 +1291,12 @@ v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_hi_u32_u24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1201,6 +1339,12 @@ v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_i32_i24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1243,6 +1387,12 @@ v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:1 v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_legacy_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1285,6 +1435,12 @@ v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_u32_u24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1327,6 +1483,12 @@ v_or_b32_e64_dpp v5, v1, v2 row_shl:1 v_or_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_or_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_or_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1476,6 +1638,12 @@ v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x33,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1518,6 +1686,12 @@ v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1560,6 +1734,12 @@ v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1709,6 +1889,12 @@ v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x34,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1751,6 +1937,12 @@ v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x05,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1793,6 +1985,12 @@ v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_nc_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1835,6 +2033,12 @@ v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_xnor_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1877,6 +2081,12 @@ v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_xor_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s index dc151d66..b18029d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s @@ -45,6 +45,12 @@ v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0 v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -57,6 +63,12 @@ v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -69,6 +81,12 @@ v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_nc_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x25,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -78,6 +96,12 @@ v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_and_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_and_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -87,6 +111,12 @@ v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_ashrrev_i32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -97,14 +127,30 @@ v_cndmask_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa4,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xac,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -117,10 +163,22 @@ v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x18,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa0,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, 10, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x14,0xa1,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -135,6 +193,12 @@ v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] @@ -147,6 +211,12 @@ v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] @@ -159,9 +229,18 @@ v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_ldexp_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +v_ldexp_f16_e64_dpp v5, v1, s2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v5, v1, 2.0 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0xe8,0x01,0x08,0x01,0x77,0x39,0x05] + v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x3b,0xd5,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] @@ -171,6 +250,12 @@ v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_lshlrev_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x18,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -180,6 +265,12 @@ v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_lshrrev_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x19,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x19,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -189,6 +280,12 @@ v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -201,6 +298,12 @@ v_max_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_max_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x16,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -213,6 +316,12 @@ v_max_num_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_i32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_i32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x12,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -222,6 +331,12 @@ v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x14,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x14,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -231,6 +346,12 @@ v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -243,6 +364,12 @@ v_min_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_min_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_num_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x15,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -255,6 +382,12 @@ v_min_num_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_i32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_i32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x11,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -264,6 +397,12 @@ v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x13,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -273,6 +412,12 @@ v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_dx9_zero_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -285,6 +430,12 @@ v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0, v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x35,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -297,6 +448,12 @@ v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x08,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -309,6 +466,12 @@ v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_hi_i32_i24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -318,6 +481,12 @@ v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_hi_u32_u24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -327,6 +496,12 @@ v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_i32_i24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_i32_i24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x09,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -336,6 +511,12 @@ v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_legacy_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_legacy_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -348,6 +529,12 @@ v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0, v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_u32_u24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_u32_u24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -357,6 +544,12 @@ v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_or_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_or_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -405,6 +598,12 @@ v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0 v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x33,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -417,6 +616,12 @@ v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -429,6 +634,12 @@ v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_nc_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x26,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -477,6 +688,12 @@ v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0, v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_subrev_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x34,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -489,6 +706,12 @@ v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] f v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_subrev_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -501,6 +724,12 @@ v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] f v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_subrev_nc_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x27,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x27,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -510,6 +739,12 @@ v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_xnor_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_xnor_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -519,6 +754,12 @@ v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_xor_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_xor_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1d,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_err.s b/llvm/test/MC/AMDGPU/gfx12_err.s index 8b2565c..245ca5f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_err.s @@ -127,3 +127,19 @@ s_prefetch_inst s[14:15], 0xffffff, m0, 7 // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 24-bit signed offset // GFX12-ERR: s_prefetch_inst s[14:15], 0xffffff, m0, 7 // GFX12-ERR: ^ + +v_cmp_le_f32 vcc_lo, v1, s2 row_mirror +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed + +v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed + +v_cmpx_gt_u16 v1, s2 op_sel:[1,1] quad_perm:[1,1,1,1] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed + +v_cmpx_class_f16_u16 v1, 2.0 quad_perm:[1,1,1,1] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index b2251f5..a15a48e 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -648,8 +648,8 @@ v_mov_b32 v0, s1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 v_and_b32 v0, s42, v1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: not a valid operand. -// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_f32 v0, v1, s45 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt index 6ab3e08..52426d3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt @@ -17,3 +17,12 @@ # GFX1150: v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] 0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX1150: v_add_f32_e64_dpp v5, v1, s2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX1150: v_min3_f16_e64_dpp v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] +0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff + +# GFX1150: v_cmp_le_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt index 1be97b2..1d69134 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt @@ -22,3 +22,7 @@ # This is more strict than the check in vinterp-fake16.txt and is GFX12 specific. # GFX12: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04] 0x00,0x00,0xe0,0xcd,0x01,0x05,0x0e,0x1c + +# Regression test for future fixes to VOPC _e64_dpp src1 +# GFX12: v_cmp_le_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 4303c6d..0771e64 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4,6 +4,12 @@ # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, 15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -101,6 +107,9 @@ # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -185,6 +194,9 @@ # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -227,6 +239,9 @@ # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -311,6 +326,9 @@ # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -437,6 +455,9 @@ # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -479,6 +500,9 @@ # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -521,6 +545,9 @@ # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -660,6 +687,9 @@ # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -702,6 +732,9 @@ # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -744,6 +777,9 @@ # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -786,6 +822,9 @@ # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1104,6 +1143,9 @@ # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1230,6 +1272,9 @@ # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1314,6 +1359,9 @@ # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1356,6 +1404,9 @@ # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1398,6 +1449,9 @@ # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1524,6 +1578,9 @@ # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1566,6 +1623,9 @@ # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1608,6 +1668,9 @@ # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1650,6 +1713,9 @@ # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1668,6 +1734,9 @@ # GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, 15, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff + # GFX12: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff @@ -1692,6 +1761,9 @@ # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1818,6 +1890,12 @@ # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, -2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1860,6 +1938,9 @@ # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1902,6 +1983,9 @@ # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1944,6 +2028,9 @@ # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2070,6 +2157,9 @@ # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2112,6 +2202,9 @@ # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2154,6 +2247,9 @@ # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2196,6 +2292,9 @@ # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2238,6 +2337,9 @@ # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2280,6 +2382,9 @@ # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2406,6 +2511,9 @@ # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2448,6 +2556,9 @@ # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2490,6 +2601,9 @@ # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2532,6 +2646,9 @@ # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2574,6 +2691,9 @@ # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2658,6 +2778,9 @@ # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2700,6 +2823,9 @@ # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2784,6 +2910,9 @@ # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2826,6 +2955,9 @@ # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2868,6 +3000,9 @@ # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2910,6 +3045,9 @@ # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2952,6 +3090,9 @@ # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3146,6 +3287,9 @@ # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3188,6 +3332,9 @@ # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3440,6 +3587,9 @@ # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3482,6 +3632,9 @@ # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3524,6 +3677,9 @@ # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3566,6 +3722,9 @@ # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3608,6 +3767,9 @@ # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3650,6 +3812,9 @@ # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3692,6 +3857,9 @@ # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3734,6 +3902,9 @@ # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3776,6 +3947,9 @@ # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3818,6 +3992,9 @@ # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3860,6 +4037,9 @@ # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3902,6 +4082,9 @@ # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3944,6 +4127,9 @@ # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3986,6 +4172,9 @@ # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -4028,6 +4217,9 @@ # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -4417,6 +4609,9 @@ # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4459,6 +4654,9 @@ # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4501,6 +4699,9 @@ # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4543,6 +4744,9 @@ # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4585,6 +4789,9 @@ # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4627,6 +4834,9 @@ # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4669,6 +4879,9 @@ # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4711,6 +4924,9 @@ # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index c73ffe7..a836ada 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -4,6 +4,12 @@ # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_add3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -59,6 +65,9 @@ # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x47,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -101,6 +110,9 @@ # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_alignbit_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x16,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -134,6 +146,9 @@ # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -173,6 +188,9 @@ # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x57,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -221,6 +239,9 @@ # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x11,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -257,6 +278,9 @@ # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x10,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -293,6 +317,9 @@ # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x12,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -354,6 +381,9 @@ # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubeid_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -390,6 +420,9 @@ # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubema_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -426,6 +459,9 @@ # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubesc_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -462,6 +498,9 @@ # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubetc_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -582,6 +621,9 @@ # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x26,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -642,6 +684,9 @@ # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_fma_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x13,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -690,6 +735,9 @@ # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x15,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -726,6 +774,9 @@ # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x46,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -762,6 +813,9 @@ # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x56,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x56,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -810,6 +864,9 @@ # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -846,6 +903,9 @@ # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -882,6 +942,9 @@ # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -918,6 +981,9 @@ # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -954,6 +1020,9 @@ # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1002,6 +1071,9 @@ # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1038,6 +1110,9 @@ # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x69,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x69,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1074,6 +1149,9 @@ # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x64,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1110,6 +1188,9 @@ # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x62,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1158,6 +1239,9 @@ # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x31,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1194,6 +1278,9 @@ # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x20,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1230,6 +1317,9 @@ # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x21,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1266,6 +1356,9 @@ # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x29,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x29,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1302,6 +1395,9 @@ # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1338,6 +1434,9 @@ # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1386,6 +1485,9 @@ # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1422,6 +1524,9 @@ # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x68,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1458,6 +1563,9 @@ # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x65,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1494,6 +1602,9 @@ # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x63,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1530,6 +1641,9 @@ # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x39,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1572,6 +1686,9 @@ # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mullit_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x18,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1608,6 +1725,9 @@ # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x58,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1650,6 +1770,9 @@ # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x44,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1686,6 +1809,9 @@ # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x23,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1722,6 +1848,9 @@ # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x24,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x24,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1758,6 +1887,9 @@ # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x25,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1794,6 +1926,9 @@ # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x22,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x22,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1874,6 +2009,9 @@ # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x45,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1910,6 +2048,9 @@ # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x40,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2006,6 +2147,9 @@ # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2048,6 +2192,12 @@ # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_fma_f16_e64_dpp v5, v1, 4.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2090,6 +2240,9 @@ # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2129,6 +2282,9 @@ # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x5a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2171,6 +2327,9 @@ # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2210,6 +2369,9 @@ # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x59,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2252,6 +2414,9 @@ # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2294,6 +2459,9 @@ # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2333,6 +2501,9 @@ # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2372,6 +2543,9 @@ # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2414,6 +2588,9 @@ # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2453,6 +2630,9 @@ # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2492,6 +2672,9 @@ # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2534,6 +2717,9 @@ # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2573,6 +2759,9 @@ # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2752,6 +2941,9 @@ # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximum3_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2788,6 +2980,9 @@ # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimum3_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2824,6 +3019,9 @@ # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximum3_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2860,6 +3058,9 @@ # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimum3_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2896,6 +3097,9 @@ # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2932,6 +3136,9 @@ # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2968,6 +3175,9 @@ # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -3004,6 +3214,9 @@ # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt index 56d7805b..b10b8da 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt @@ -59,6 +59,9 @@ # GFX12: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -101,6 +104,9 @@ # GFX12: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -143,6 +149,9 @@ # GFX12: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -185,6 +194,9 @@ # GFX12: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_and_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -227,6 +239,9 @@ # GFX12: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_ashrrev_i32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -270,6 +285,10 @@ # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff +# W32: v_cndmask_b32_e64_dpp v5, v1, s3, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x06,0x18,0x00,0x01,0x1b,0x00,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, s3, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x06,0x18,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x06,0x18,0x00,0x01,0x1b,0x00,0xff + # W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff @@ -324,6 +343,9 @@ # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -366,6 +388,9 @@ # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -390,6 +415,9 @@ # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_ldexp_f16_e64_dpp v5, v1, 2.0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x21,0x01,0xff + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff @@ -408,6 +436,9 @@ # GFX12: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_lshlrev_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -450,6 +481,9 @@ # GFX12: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_lshrrev_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -492,6 +526,9 @@ # GFX12: v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_num_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x31,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -534,6 +571,9 @@ # GFX12: v_max_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_num_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x16,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_num_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -576,6 +616,9 @@ # GFX12: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_i32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -618,6 +661,9 @@ # GFX12: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -660,6 +706,9 @@ # GFX12: v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_num_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x30,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -702,6 +751,9 @@ # GFX12: v_min_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_num_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x15,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_num_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -744,6 +796,9 @@ # GFX12: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_i32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -786,6 +841,9 @@ # GFX12: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -828,6 +886,9 @@ # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -870,6 +931,9 @@ # GFX12: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -912,6 +976,9 @@ # GFX12: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -954,6 +1021,9 @@ # GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -996,6 +1066,9 @@ # GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1038,6 +1111,9 @@ # GFX12: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_i32_i24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1080,6 +1156,9 @@ # GFX12: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_u32_u24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1122,6 +1201,9 @@ # GFX12: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_or_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1219,6 +1301,9 @@ # GFX12: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1261,6 +1346,9 @@ # GFX12: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1303,6 +1391,9 @@ # GFX12: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1400,6 +1491,9 @@ # GFX12: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1442,6 +1536,9 @@ # GFX12: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1484,6 +1581,9 @@ # GFX12: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1526,6 +1626,9 @@ # GFX12: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_xnor_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1568,6 +1671,9 @@ # GFX12: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_xor_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt index da7faa8..f78106e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt @@ -23,6 +23,9 @@ # GFX12: v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_add_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x32,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -35,6 +38,9 @@ # GFX12: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_add_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x03,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -47,18 +53,27 @@ # GFX12: v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_add_nc_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x25,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_and_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_ashrrev_i32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 @@ -66,6 +81,10 @@ # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 +# W32: v_cndmask_b32_e64_dpp v5, v1, s3, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x06,0x18,0x00,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, s3, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x06,0x18,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x06,0x18,0x00,0x01,0x77,0x39,0x05 + # W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 @@ -84,6 +103,9 @@ # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -96,30 +118,48 @@ # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] 0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x08,0x01,0x77,0x39,0x05 + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] 0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05 +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x10,0x01,0x77,0x39,0x05 + # GFX12: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 # GFX12: v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_lshlrev_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x18,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_lshrrev_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x19,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_max_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_num_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x31,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -132,6 +172,9 @@ # GFX12: v_max_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_num_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x16,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x16,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x16,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -144,18 +187,27 @@ # GFX12: v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_i32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x12,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x14,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_min_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_num_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -168,6 +220,9 @@ # GFX12: v_min_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_num_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x15,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x15,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x15,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -180,18 +235,27 @@ # GFX12: v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_i32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x11,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x13,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x07,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -204,6 +268,9 @@ # GFX12: v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x35,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -216,6 +283,9 @@ # GFX12: v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x08,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -228,30 +298,45 @@ # GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_i32_i24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x09,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_u32_u24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_or_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 @@ -277,6 +362,9 @@ # GFX12: v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_sub_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x33,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -289,6 +377,9 @@ # GFX12: v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_sub_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x04,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -301,6 +392,9 @@ # GFX12: v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_sub_nc_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x26,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 @@ -326,6 +420,9 @@ # GFX12: v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x34,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -338,6 +435,9 @@ # GFX12: v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x05,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -350,17 +450,26 @@ # GFX12: v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_nc_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x27,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_xnor_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1e,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_xor_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1d,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 |