aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2024-01-04 00:10:15 +0100
committerGitHub <noreply@github.com>2024-01-04 00:10:15 +0100
commit49b492048af2b2093aaed899c0bbd6d740aad83c (patch)
tree1f4f9b2ba68133bd36b607d09abed154b32ee783 /llvm/test
parent49029f926d359075d59ad4aec2d01a21d9514b02 (diff)
downloadllvm-49b492048af2b2093aaed899c0bbd6d740aad83c.zip
llvm-49b492048af2b2093aaed899c0bbd6d740aad83c.tar.gz
llvm-49b492048af2b2093aaed899c0bbd6d740aad83c.tar.bz2
AMDGPU: Fix packed 16-bit inline constants (#76522)
Consistently treat packed 16-bit operands as 32-bit values, because that's really what they are. The attempt to treat them differently was ultimately incorrect and lead to miscompiles, e.g. when using non-splat constants such as (1, 0) as operands. Recognize 32-bit float constants for i/u16 instructions. This is a bit odd conceptually, but it matches HW behavior and SP3. Remove isFoldableLiteralV216; there was too much magic in the dependency between it and its use in SIFoldOperands. Instead, we now simply rely on checking whether a constant is an inline constant, and trying a bunch of permutations of the low and high halves. This is more obviously correct and leads to some new cases where inline constants are used as shown by tests. Move the logic for switching packed add vs. sub into SIFoldOperands. This has two benefits: all logic that optimizes for inline constants in packed math is now in one place; and it applies to both SelectionDAG and GISel paths. Disable the use of opsel with v_dot* instructions on gfx11. They are documented to ignore opsel on src0 and src1. It may be interesting to re-enable to use of opsel on src2 as a future optimization. A similar "proper" fix of what inline constants mean could potentially be applied to unpacked 16-bit ops. However, it's less clear what the benefit would be, and there are surely places where we'd have to carefully audit whether values are properly sign- or zero-extended. It is best to keep such a change separate. Fixes: Corruption in FSR 2.0 (latent bug exposed by an LLPC change)
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll67
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/add.v2i16.ll73
-rw-r--r--llvm/test/CodeGen/AMDGPU/calling-conventions.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll40
-rw-r--r--llvm/test/CodeGen/AMDGPU/fma.f16.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll145
-rw-r--r--llvm/test/CodeGen/AMDGPU/fptosi.f16.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/immv216.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll813
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll29
-rw-r--r--llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll1055
-rw-r--r--llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/sub.v2i16.ll13
-rw-r--r--llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll46
-rw-r--r--llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s59
-rw-r--r--llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s57
-rw-r--r--llvm/test/MC/AMDGPU/literalv216.s20
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt2
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt56
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt56
-rw-r--r--llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt120
24 files changed, 1198 insertions, 1533 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
index e4cabab..496ee9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
@@ -172,8 +172,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX9-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc0ffc0
-; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, 64 op_sel_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
@@ -188,7 +187,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX10-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_add_u16 v0, 0xffc0, v0 op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_sub_u16 v0, v0, 64 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %a, <i16 -64, i16 -64>
ret <2 x i16> %add
@@ -609,3 +608,65 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
%cast = bitcast <2 x i16> %add to i32
ret i32 %cast
}
+
+define <2 x i16> @add_inline_imm_neg1_0(<2 x i16> %x) {
+; GFX7-LABEL: add_inline_imm_neg1_0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, -1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_neg1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: add_inline_imm_neg1_0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-NEXT: v_add_u16_e32 v0, -1, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_neg1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 -1, i16 0>
+ ret <2 x i16> %y
+}
+
+define <2 x i16> @add_inline_imm_1_0(<2 x i16> %x) {
+; GFX7-LABEL: add_inline_imm_1_0:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: add_inline_imm_1_0:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX8-NEXT: v_add_u16_e32 v0, 1, v0
+; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 1, i16 0>
+ ret <2 x i16> %y
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
index aa7aa6b..5613501 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
@@ -156,13 +156,13 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0xffc0 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0xffc0ffc0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX11-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xffc0 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xffc0ffc0
; GFX11-NEXT: s_setpc_b64 s[30:31]
%sub = sub <2 x i16> %a, <i16 -64, i16 -64>
ret <2 x i16> %sub
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index b90d68a..7cf58a2 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -437,7 +437,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v0, v0, -1
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
@@ -449,7 +449,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v0, v0, s[2:3] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, -1
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -460,7 +460,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, -1
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -566,8 +566,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_mov_b32 s2, 1.0
-; GFX9-NEXT: v_pk_add_u16 v0, v0, s2
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1.0
; GFX9-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-NEXT: s_endpgm
;
@@ -579,7 +578,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v0, v0, s[2:3] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_add_u16 v0, 0x3f80, v0 op_sel:[1,0] op_sel_hi:[0,1]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1.0
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -590,7 +589,7 @@ define amdgpu_kernel void @v_test_add_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_add_u16 v0, 0x3f80, v0 op_sel:[1,0] op_sel_hi:[0,1]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1.0
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -990,6 +989,66 @@ define amdgpu_kernel void @v_test_add_v2i16_sext_to_v2i64(ptr addrspace(1) %out,
ret void
}
+define <2 x i16> @add_inline_imm_neg1_0(<2 x i16> %x) {
+; VI-LABEL: add_inline_imm_neg1_0:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; VI-NEXT: v_add_u16_e32 v0, -1, v0
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_neg1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_neg1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: add_inline_imm_neg1_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v0, v0, 1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 -1, i16 0>
+ ret <2 x i16> %y
+}
+
+define <2 x i16> @add_inline_imm_1_0(<2 x i16> %x) {
+; VI-LABEL: add_inline_imm_1_0:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; VI-NEXT: v_add_u16_e32 v0, 1, v0
+; VI-NEXT: v_or_b32_e32 v0, v0, v1
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_inline_imm_1_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: add_inline_imm_1_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: add_inline_imm_1_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %y = add <2 x i16> %x, <i16 1, i16 0>
+ ret <2 x i16> %y
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #0
attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index cb89841b..d63ebde 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -431,7 +431,7 @@ define amdgpu_ps void @ps_mesa_v2i16(<2 x i16> %arg0) {
;
; GFX11-LABEL: ps_mesa_v2i16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -468,7 +468,7 @@ define amdgpu_ps void @ps_mesa_inreg_v2i16(<2 x i16> inreg %arg0) {
;
; GFX11-LABEL: ps_mesa_inreg_v2i16:
; GFX11: ; %bb.0:
-; GFX11-NEXT: v_pk_sub_u16 v0, s0, -1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, s0, 1 op_sel_hi:[1,0]
; GFX11-NEXT: global_store_b32 v[0:1], v0, off
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index 329f0a2..dfc8361 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -597,7 +597,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) {
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: ds_read_u16_d16_hi v1, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GCN-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GCN-NEXT: ds_read_u16_d16 v1, v0 offset:2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, v1
@@ -608,7 +608,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) {
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: ds_read_u16_d16_hi v1, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX10-NEXT: ds_read_u16_d16 v1, v0 offset:2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, v1
@@ -619,7 +619,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep(ptr addrspace(3) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: ds_load_u16_d16_hi v1, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX11-NEXT: ds_load_u16_d16 v1, v0 offset:2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
@@ -643,7 +643,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX900-NEXT: ds_read_u16_d16_hi v0, v0
; GFX900-NEXT: s_mov_b32 s4, 0xffff
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -654,7 +654,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; FLATSCR-NEXT: ds_read_u16_d16_hi v0, v0
; FLATSCR-NEXT: s_mov_b32 s0, 0xffff
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
-; FLATSCR-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: v_bfi_b32 v0, s0, v1, v0
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
;
@@ -664,7 +664,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX10-NEXT: ds_read_u16 v1, v0 offset:2
; GFX10-NEXT: ds_read_u16_d16_hi v0, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -674,7 +674,7 @@ define <2 x i16> @chain_hi_to_lo_group_other_dep_multi_chain(ptr addrspace(3) %p
; GFX11-NEXT: ds_load_u16 v1, v0 offset:2
; GFX11-NEXT: ds_load_u16_d16_hi v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v1, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -694,7 +694,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX900-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen
; GFX900-NEXT: s_waitcnt vmcnt(0)
-; GFX900-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX900-NEXT: buffer_load_short_d16 v1, v0, s[0:3], 0 offen offset:2
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: v_mov_b32_e32 v0, v1
@@ -705,7 +705,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR-NEXT: scratch_load_short_d16_hi v1, v0, off
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: scratch_load_short_d16 v1, v0, off offset:2
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: v_mov_b32_e32 v0, v1
@@ -716,7 +716,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10_DEFAULT-NEXT: buffer_load_short_d16_hi v1, v0, s[0:3], 0 offen
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
-; GFX10_DEFAULT-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX10_DEFAULT-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX10_DEFAULT-NEXT: buffer_load_short_d16 v1, v0, s[0:3], 0 offen offset:2
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0)
; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v0, v1
@@ -727,7 +727,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FLATSCR_GFX10-NEXT: scratch_load_short_d16_hi v1, v0, off
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
-; FLATSCR_GFX10-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; FLATSCR_GFX10-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; FLATSCR_GFX10-NEXT: scratch_load_short_d16 v1, v0, off offset:2
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0)
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, v1
@@ -738,7 +738,7 @@ define <2 x i16> @chain_hi_to_lo_private_other_dep(ptr addrspace(5) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: scratch_load_d16_hi_b16 v1, v0, off
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v1, v1, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 12 op_sel_hi:[1,0]
; GFX11-NEXT: scratch_load_d16_b16 v1, v0, off offset:2
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
@@ -762,7 +762,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; GFX900-NEXT: global_load_short_d16_hi v0, v[0:1], off glc
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xffff
-; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -774,7 +774,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; FLATSCR-NEXT: global_load_short_d16_hi v0, v[0:1], off glc
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, 0xffff
-; FLATSCR-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: v_bfi_b32 v0, s0, v2, v0
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
;
@@ -785,7 +785,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_load_short_d16_hi v0, v[0:1], off glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -796,7 +796,7 @@ define <2 x i16> @chain_hi_to_lo_global_other_dep(ptr addrspace(1) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: global_load_d16_hi_b16 v0, v[0:1], off glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -820,7 +820,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; GFX900-NEXT: s_waitcnt vmcnt(0)
; GFX900-NEXT: s_mov_b32 s4, 0xffff
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX900-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0
; GFX900-NEXT: s_setpc_b64 s[30:31]
;
@@ -833,7 +833,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; FLATSCR-NEXT: s_waitcnt vmcnt(0)
; FLATSCR-NEXT: s_mov_b32 s0, 0xffff
; FLATSCR-NEXT: s_waitcnt lgkmcnt(0)
-; FLATSCR-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; FLATSCR-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; FLATSCR-NEXT: v_bfi_b32 v0, s0, v2, v0
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
;
@@ -846,7 +846,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: flat_load_short_d16_hi v0, v[0:1] glc dlc
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX10-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
@@ -857,7 +857,7 @@ define <2 x i16> @chain_hi_to_lo_flat_other_dep(ptr addrspace(0) %ptr) {
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: flat_load_d16_hi_b16 v0, v[0:1] glc dlc
; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_sub_u16 v0, v0, -12 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 12 op_sel_hi:[1,0]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_bfi_b32 v0, 0xffff, v2, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
index 7894f6b..e12de1d 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
@@ -255,8 +255,8 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) {
; GFX10-GISEL: ; %bb.0: ; %bb
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e
-; GFX10-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e, v0 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e, v0, v1 op_sel_hi:[0,1,1]
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0
+; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
; GFX10-GISEL-NEXT: v_cmp_gt_f16_e64 s4, 0, v0
@@ -288,9 +288,9 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) {
; GFX11-GISEL: ; %bb.0: ; %bb
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e
-; GFX11-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e, v0 op_sel_hi:[0,1]
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e, v0, v1 op_sel_hi:[0,1,1]
+; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1
; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2
; GFX11-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4)
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 0ff5ea6..3e658c6 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -77,11 +77,29 @@ define <2 x half> @v_mul_42_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_42_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_42_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_42_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0x51405140, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_42_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0x5140, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_42_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0x51405140, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %x, <half 42.0, half 42.0>
ret <2 x half> %mul
}
@@ -3192,11 +3210,29 @@ define <2 x half> @v_mul_16_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_16_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_16_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_16_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_16_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_16_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %x, <half 16.0, half 16.0>
ret <2 x half> %mul
}
@@ -3216,11 +3252,29 @@ define <2 x half> @v_mul_neg16_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_neg16_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_neg16_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_neg16_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0xcc00cc00, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_neg16_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0xcc00, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_neg16_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0xcc00cc00, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul <2 x half> %x, <half -16.0, half -16.0>
ret <2 x half> %mul
}
@@ -3242,12 +3296,33 @@ define <2 x half> @v_mul_fabs_16_v2f16(<2 x half> %x) {
; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_mul_fabs_16_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
-; GFX1011-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_mul_fabs_16_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_16_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_16_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, 0x4c00, v0 op_sel_hi:[0,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_16_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GFX11-GISEL-NEXT: v_pk_mul_f16 v0, 0x4c004c00, v0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%x.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%mul = fmul <2 x half> %x.fabs, <half 16.0, half 16.0>
ret <2 x half> %mul
@@ -3268,11 +3343,29 @@ define <2 x half> @v_fma_mul_add_32_v2f16(<2 x half> %x, <2 x half> %y) {
; GFX9-GISEL-NEXT: v_pk_fma_f16 v0, v0, v2, v1
; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1011-LABEL: v_fma_mul_add_32_v2f16:
-; GFX1011: ; %bb.0:
-; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX1011-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
-; GFX1011-NEXT: s_setpc_b64 s[30:31]
+; GFX10-SDAG-LABEL: v_fma_mul_add_32_v2f16:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_fma_mul_add_32_v2f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_pk_fma_f16 v0, 0x50005000, v0, v1
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_fma_mul_add_32_v2f16:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, 0x5000, v0, v1 op_sel_hi:[0,1,1]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_fma_mul_add_32_v2f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x50005000, v0, v1
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = fmul contract <2 x half> %x, <half 32.0, half 32.0>
%fma = fadd contract <2 x half> %mul, %y
ret <2 x half> %fma
diff --git a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
index 3afcc7d..afb3a02 100644
--- a/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptosi.f16.ll
@@ -480,7 +480,7 @@ define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) {
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: v_cmp_eq_f16_e64 s[4:5], 0xbc00, s4
+; VI-NEXT: v_cmp_eq_f16_e64 s[4:5], -1.0, s4
; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
@@ -492,7 +492,7 @@ define amdgpu_kernel void @fptosi_f16_to_i1(ptr addrspace(1) %out, half %in) {
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_cmp_eq_f16_e64 s2, 0xbc00, s2
+; GFX11-NEXT: v_cmp_eq_f16_e64 s2, -1.0, s2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
; GFX11-NEXT: s_mov_b32 s2, -1
diff --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll
index 8c33004..b66ca71 100644
--- a/llvm/test/CodeGen/AMDGPU/immv216.ll
+++ b/llvm/test/CodeGen/AMDGPU/immv216.ll
@@ -580,7 +580,7 @@ define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x h
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x38003800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x38003800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x38]
define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>)
ret <2 x i16> %y
@@ -590,7 +590,7 @@ define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xb800b800
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0xb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0xb800b800, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0x00,0xb8]
define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>)
ret <2 x i16> %y
@@ -600,7 +600,7 @@ define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x3c003c00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x3c003c00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x3c]
define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>)
ret <2 x i16> %y
@@ -610,27 +610,25 @@ define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xbc00bc00
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0xbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0xbc00bc00, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0x00,0xbc]
define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>)
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x40004000
-; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]]
+; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1]
-; GFX10: v_pk_lshlrev_b16 v0, v0, 0x4000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x40,0x00,0x00]
+; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xe9,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) {
%y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x
ret <2 x i16> %y
}
; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16:
-; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc000c000
-; GFX9: v_pk_lshlrev_b16 v0, v0, [[K]]
+; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1]
-; GFX10: v_pk_lshlrev_b16 v0, v0, 0xc000 op_sel_hi:[1,0] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc0,0x00,0x00]
+; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xeb,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}]
define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
%y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x
ret <2 x i16> %y
@@ -640,7 +638,7 @@ define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x44004400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x44004400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x44]
define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>)
ret <2 x i16> %y
@@ -651,7 +649,7 @@ define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0xc400c400
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0xc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0xc400c400, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0x00,0xc4]
define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>)
ret <2 x i16> %y
@@ -661,7 +659,7 @@ define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) {
; GFX9: s_mov_b32 [[K:s[0-9]+]], 0x31183118
; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]]
-; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00]
+; GFX10: v_pk_mul_lo_u16 v0, 0x31183118, v0 ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x18,0x31]
define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) {
%y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>)
ret <2 x i16> %y
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index e2a3749..8874240 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -473,89 +473,47 @@ define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i16:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i16:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_imad_pat_v2i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_imad_pat_v2i16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_imad_pat_v2i16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y18, %y
@@ -733,18 +691,18 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -775,18 +733,18 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -817,8 +775,8 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_imad_pat_v3i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -828,11 +786,11 @@ define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -1130,18 +1088,18 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1172,18 +1130,18 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1214,8 +1172,8 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_imad_pat_v4i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -1225,11 +1183,11 @@ define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -1555,89 +1513,47 @@ define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_umad_pat_v2i16:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_umad_pat_v2i16:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_umad_pat_v2i16:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_umad_pat_v2i16:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_umad_pat_v2i16:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_umad_pat_v2i16:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_umad_pat_v2i16:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y18 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y18, %y
@@ -1815,18 +1731,18 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1857,18 +1773,18 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -1899,8 +1815,8 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_umad_pat_v3i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -1910,11 +1826,11 @@ define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -2212,18 +2128,18 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX9-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX9-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -2254,18 +2170,18 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX10-SDAG: ; %bb.0: ; %entry
; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX10-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
@@ -2296,8 +2212,8 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-LABEL: clpeak_umad_pat_v4i16:
; GFX11-SDAG: ; %bb.0: ; %entry
; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
@@ -2307,11 +2223,11 @@ define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v5, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v3, v4, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v4, v1, 1 op_sel_hi:[1,0]
+; GFX11-SDAG-NEXT: v_pk_add_u16 v5, v0, 1 op_sel_hi:[1,0]
; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
@@ -7277,143 +7193,74 @@ define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_imad_pat_v2i16_x2:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_imad_pat_v2i16_x2:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX10-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_imad_pat_v2i16_x2:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y38 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y38, %y
@@ -7654,143 +7501,74 @@ define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX9-GISEL: ; %bb.0: ; %entry
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX10-SDAG: ; %bb.0: ; %entry
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX10-GISEL: ; %bb.0: ; %entry
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: clpeak_umad_pat_v2i16_x2:
+; GFX9: ; %bb.0: ; %entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX9-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX9-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX11-SDAG: ; %bb.0: ; %entry
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX10-LABEL: clpeak_umad_pat_v2i16_x2:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX10-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX10-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX10-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX10-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
-; GFX11-GISEL: ; %bb.0: ; %entry
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
-; GFX11-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX11-LABEL: clpeak_umad_pat_v2i16_x2:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v0, v2, v0
+; GFX11-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v1, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_add_u16 v2, v1, v2
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v2, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v2, v0, v1
+; GFX11-NEXT: v_pk_add_u16 v1, v2, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v1, v0
+; GFX11-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v1
+; GFX11-NEXT: v_pk_mul_lo_u16 v0, v0, v2
+; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%y38 = add <2 x i16> %x, <i16 1, i16 1>
%add = mul <2 x i16> %y38, %y
@@ -8373,6 +8151,24 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: mul_u24_add64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v0
+; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v4, v[2:3]
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: mul_u24_add64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v1
+; GFX11-GISEL-NEXT: v_mul_hi_u32_u24_e32 v1, v0, v1
+; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v4, v2
+; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
%mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
%add = add i64 %mul, %z
ret i64 %add
@@ -8410,6 +8206,15 @@ define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: mul_u24_zext_add64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_mul_u32_u24_e32 v0, v0, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
%mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y)
%mul.zext = zext i32 %mul to i64
%add = add i64 %mul.zext, %z
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
index 54bd78e..66f159f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
@@ -75,26 +75,15 @@ entry:
; Make sure we do not violate constant bus restriction with 3 scalar inputs and simingly inlinable literal.
define amdgpu_ps void @test_llvm_amdgcn_fdot2_bf16_bf16_sis(
-; SDAG-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_sis:
-; SDAG-GFX11: ; %bb.0: ; %entry
-; SDAG-GFX11-NEXT: v_mov_b32_e32 v2, s1
-; SDAG-GFX11-NEXT: s_mov_b32 s1, 0x10001
-; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; SDAG-GFX11-NEXT: v_dot2_bf16_bf16 v2, s0, s1, v2
-; SDAG-GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; SDAG-GFX11-NEXT: s_nop 0
-; SDAG-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; SDAG-GFX11-NEXT: s_endpgm
-;
-; GISEL-GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_sis:
-; GISEL-GFX11: ; %bb.0: ; %entry
-; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, 0x10001
-; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GISEL-GFX11-NEXT: v_dot2_bf16_bf16 v2, s0, v2, s1
-; GISEL-GFX11-NEXT: global_store_b16 v[0:1], v2, off
-; GISEL-GFX11-NEXT: s_nop 0
-; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GISEL-GFX11-NEXT: s_endpgm
+; GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16_sis:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: v_mov_b32_e32 v2, s1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_dot2_bf16_bf16 v2, s0, 0x10001, v2
+; GFX11-NEXT: global_store_b16 v[0:1], v2, off
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
ptr addrspace(1) %r,
<2 x i16> inreg %a,
i16 inreg %c) {
diff --git a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
index 81918f5..e96570d 100644
--- a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
@@ -23,7 +23,7 @@ bb:
%tmp1 = zext i32 %tmp to i64
%tmp2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %arg, i64 %tmp1
%tmp3 = load <2 x half>, ptr addrspace(1) %tmp2, align 4
- %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
+ %tmp4 = tail call <2 x half> @llvm.maxnum.v2f16(<2 x half> %tmp3, <2 x half> <half 0xH3C00, half 0xH0000>)
store <2 x half> %tmp4, ptr addrspace(1) %tmp2, align 4
ret void
}
@@ -96,7 +96,7 @@ bb:
; GCN-LABEL: {{^}}test_pk_max_f16_literal_0_41c8:
; GFX9: s_mov_b32 [[C:s[0-9]+]], 0x41c80000
; GFX9: v_pk_max_f16 v{{[0-9]+}}, v{{[0-9]+}}, [[C]]{{$}}
-; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c8, v{{[0-9]+}} op_sel:[1,0] op_sel_hi:[0,1]{{$}}
+; GFX10: v_pk_max_f16 v{{[0-9]+}}, 0x41c80000, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_pk_max_f16_literal_0_41c8(ptr addrspace(1) nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 536b2d0..3c654e9 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -1622,14 +1622,14 @@ define <2 x i16> @v_mul_add_1_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX9-LABEL: v_mul_add_1_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_add_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %y, <i16 1, i16 1>
@@ -1665,14 +1665,14 @@ define <2 x i16> @v_mul_add_1_v2i16_commute(<2 x i16> %x, <2 x i16> %y) {
; GFX9-LABEL: v_mul_add_1_v2i16_commute:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v1, v0
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_add_1_v2i16_commute:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v1, v0
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %y, <i16 1, i16 1>
@@ -1886,14 +1886,14 @@ define <2 x i16> @v_mul_add_2_v2i16(<2 x i16> %x, <2 x i16> %y) {
; GFX9-LABEL: v_mul_add_2_v2i16:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0]
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_add_2_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_pk_sub_u16 v1, v1, -2 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 2 op_sel_hi:[1,0]
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, v1
; GFX10-NEXT: s_setpc_b64 s[30:31]
%add = add <2 x i16> %y, <i16 2, i16 2>
@@ -2929,14 +2929,14 @@ define <2 x i16> @v_mul_5_add_1_v2i16(<2 x i16> %arg) {
; GFX9: ; %bb.0:
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
-; GFX9-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_mul_5_add_1_v2i16:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-NEXT: v_pk_mul_lo_u16 v0, v0, 5 op_sel_hi:[1,0]
-; GFX10-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
; GFX10-NEXT: s_setpc_b64 s[30:31]
%mul = mul <2 x i16> %arg, <i16 5, i16 5>
%add = add <2 x i16> %mul, <i16 1, i16 1>
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index a8ae8c0..73f2834 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -2399,7 +2399,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0xc400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0xc4000000
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -2410,7 +2410,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_1_0(ptr addrspace(1) %out, ptr a
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0xc400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0xc4000000
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2534,7 +2534,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x4400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v1, v1, 0x44000000
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-NEXT: s_endpgm
;
@@ -2545,7 +2545,7 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_0_neg1_0(ptr addrspace(1) %out, pt
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x4400 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v1, v1, 0x44000000
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -2645,76 +2645,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe0ffe0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg32_neg32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg32_neg32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg32_neg32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg32_neg32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -2803,76 +2767,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg32(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe00000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_0_neg32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_0_neg32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_0_neg32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_0_neg32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -2963,76 +2891,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg32_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg32_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg32_0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg32_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3128,75 +3020,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(ptr addrspace(1) %out,
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg16_neg16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg16_neg16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg16_neg16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg16_neg16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, v1, -16 op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3285,75 +3142,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_0_neg16(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_0_neg16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_0_neg16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_0_neg16:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, -16 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_0_neg16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3444,75 +3266,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(ptr addrspace(1) %out, ptr
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, -16
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, -16
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg16_0:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 16
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 16
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg16_0:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 16
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg16_0:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, v1, -16
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg16_0:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 16
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3613,9 +3400,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x3c003c00
+; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xc400c400
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -3631,53 +3418,29 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(ptr addrspace(1) %out, p
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x3c00 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xc400, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
-;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x3c00 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fpone:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xc400, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_fpone:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, 0xc400c400, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3778,9 +3541,9 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xbc00bc00
+; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x44004400
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
+; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v1, s2
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-SDAG-NEXT: s_endpgm
;
@@ -3796,53 +3559,29 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(ptr addrspace(1) %out
; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xbc00 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
-;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xbc00 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, 0x44004400, v1
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfpone:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0x4400, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_negfpone:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, 0x44004400, v1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -3937,77 +3676,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(ptr addrspace(1) %out, p
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0xc000c000
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x40004000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xc000 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0x4000, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg_fptwo:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0xc000 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_fptwo:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_fptwo:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0x4000, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_fptwo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, v1, 2.0 op_sel:[0,1]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -4102,77 +3804,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(ptr addrspace(1) %out
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_mov_b32 s2, 0x40004000
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, s2
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xc000c000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x4000 op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xc000, v1 op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_neg_negfptwo:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 0x4000 op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_neg_negfptwo:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_neg_negfptwo:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xc000, v1 op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_neg_negfptwo:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v1, v1, -2.0 op_sel:[0,1]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -4260,76 +3925,40 @@ define amdgpu_kernel void @v_test_v2i16_x_add_undef_neg32(ptr addrspace(1) %out,
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-SDAG-NEXT: s_endpgm
-;
-; GFX9-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffe00000
-; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, v2
-; GFX9-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX9-GISEL-NEXT: s_endpgm
-;
-; GFX10-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SDAG-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX10-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-SDAG-NEXT: s_endpgm
-;
-; GFX10-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX10-GISEL-NEXT: s_endpgm
+; GFX9-LABEL: v_test_v2i16_x_add_undef_neg32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT: s_endpgm
;
-; GFX11-SDAG-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
-; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-SDAG-NEXT: s_nop 0
-; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-SDAG-NEXT: s_endpgm
+; GFX10-LABEL: v_test_v2i16_x_add_undef_neg32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v1, v0, s[2:3]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: s_endpgm
;
-; GFX11-GISEL-LABEL: v_test_v2i16_x_add_undef_neg32:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
-; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel:[1,0] op_sel_hi:[0,1]
-; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX11-GISEL-NEXT: s_nop 0
-; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
-; GFX11-GISEL-NEXT: s_endpgm
+; GFX11-LABEL: v_test_v2i16_x_add_undef_neg32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: global_load_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_pk_sub_u16 v1, v1, 32 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: s_nop 0
+; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %in, i64 %tid.ext
@@ -4455,7 +4084,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: global_load_dword v1, v0, s[2:3]
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
+; GFX10-GISEL-NEXT: v_pk_add_u16 v1, 0xffffffe0, v1
; GFX10-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX10-GISEL-NEXT: s_endpgm
;
@@ -4479,7 +4108,7 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3]
; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffe0, v1 op_sel_hi:[0,1]
+; GFX11-GISEL-NEXT: v_pk_add_u16 v1, 0xffffffe0, v1
; GFX11-GISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index 9a6851c..b237703 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -6,7 +6,7 @@
; GFX9: s_load_dword [[VAL:s[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
; CIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
; CIVI: s_sub_i32
@@ -30,7 +30,7 @@ define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0
; GFX9: global_load_dword [[VAL:v[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
; VI-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
; VI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16,
@@ -70,7 +70,7 @@ define amdgpu_kernel void @v_abs_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9: s_load_dword [[VAL:s[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -88,7 +88,7 @@ define amdgpu_kernel void @s_abs_v2i16_2(ptr addrspace(1) %out, <2 x i16> %val)
; GFX9: global_load_dword [[VAL:v[0-9]+]]
; GFX9: v_pk_sub_i16 [[SUB:v[0-9]+]], 0, [[VAL]]
; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]]
-; GFX9: v_pk_sub_u16 [[ADD:v[0-9]+]], [[MAX]], -2 op_sel_hi:[1,0]
+; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
%z0 = insertelement <2 x i16> undef, i16 0, i16 0
%z1 = insertelement <2 x i16> %z0, i16 0, i16 1
@@ -111,8 +111,8 @@ define amdgpu_kernel void @v_abs_v2i16_2(ptr addrspace(1) %out, ptr addrspace(1)
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, s[[#LOAD + 3]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], s[[#LOAD + 2]], [[SUB0]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], s[[#LOAD + 3]], [[SUB1]]
-; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
-; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
@@ -135,11 +135,11 @@ define amdgpu_kernel void @s_abs_v4i16(ptr addrspace(1) %out, <4 x i16> %val) #0
; GFX9-DAG: v_pk_sub_i16 [[SUB0:v[0-9]+]], 0, v[[VAL0]]
; GFX9-DAG: v_pk_max_i16 [[MAX0:v[0-9]+]], v[[VAL0]], [[SUB0]]
-; GFX9-DAG: v_pk_sub_u16 [[ADD0:v[0-9]+]], [[MAX0]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD0:v[0-9]+]], [[MAX0]], 2 op_sel_hi:[1,0]
; GFX9-DAG: v_pk_sub_i16 [[SUB1:v[0-9]+]], 0, v[[VAL1]]
; GFX9-DAG: v_pk_max_i16 [[MAX1:v[0-9]+]], v[[VAL1]], [[SUB1]]
-; GFX9-DAG: v_pk_sub_u16 [[ADD1:v[0-9]+]], [[MAX1]], -2 op_sel_hi:[1,0]
+; GFX9-DAG: v_pk_add_u16 [[ADD1:v[0-9]+]], [[MAX1]], 2 op_sel_hi:[1,0]
define amdgpu_kernel void @v_abs_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %src) #0 {
%z0 = insertelement <4 x i16> undef, i16 0, i16 0
%z1 = insertelement <4 x i16> %z0, i16 0, i16 1
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index aedf06d..a2712ec 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -427,7 +427,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -460,7 +460,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
; GFX10-NEXT: s_mov_b32 s2, -1
-; GFX10-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, -1
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX10-NEXT: s_endpgm
;
@@ -473,7 +473,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(ptr addrspace(1) %out, p
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
-; GFX11-NEXT: v_pk_sub_i16 v0, v0, -1 op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
@@ -562,13 +562,12 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT: s_mov_b32 s4, 1.0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v0, v0, s[2:3] glc
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
-; GFX9-NEXT: v_pk_sub_i16 v0, v0, s4
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, 1.0
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
;
@@ -600,7 +599,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 s3, 0x31016000
; GFX10-NEXT: s_mov_b32 s2, -1
-; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0x3f80 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, 1.0
; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX10-NEXT: s_endpgm
;
@@ -613,7 +612,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_fp_split(ptr addrspace(1) %ou
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
-; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0x3f80 op_sel:[0,1] op_sel_hi:[1,0]
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, 1.0
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index e46992c..819e5e8 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -369,13 +369,13 @@ define <2 x i16> @vec_smax_smin(<2 x i16> %src) {
; SDAG-GFX9-NEXT: v_pk_min_i16 v0, v0, s4 op_sel_hi:[1,0]
; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: vec_smax_smin:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_max_i16 v0, v0, 0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: vec_smax_smin:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: vec_smax_smin:
; GISEL-VI: ; %bb.0:
@@ -396,6 +396,14 @@ define <2 x i16> @vec_smax_smin(<2 x i16> %src) {
; GISEL-GFX9-NEXT: v_mov_b32_e32 v1, 0xff00ff
; GISEL-GFX9-NEXT: v_pk_min_i16 v0, v0, v1
; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: vec_smax_smin:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src.max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src, <2 x i16> <i16 0, i16 0>)
%src.clamp = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src.max, <2 x i16> <i16 255, i16 255>)
ret <2 x i16> %src.clamp
@@ -548,13 +556,13 @@ define <2 x i16> @vec_smin_smax(<2 x i16> %src) {
; SDAG-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
; SDAG-GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-LABEL: vec_smin_smax:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_pk_max_i16 v0, v0, 0
-; GFX11-NEXT: s_setpc_b64 s[30:31]
+; SDAG-GFX11-LABEL: vec_smin_smax:
+; SDAG-GFX11: ; %bb.0:
+; SDAG-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX11-NEXT: v_pk_min_i16 v0, 0xff, v0 op_sel_hi:[0,1]
+; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; SDAG-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; SDAG-GFX11-NEXT: s_setpc_b64 s[30:31]
;
; GISEL-VI-LABEL: vec_smin_smax:
; GISEL-VI: ; %bb.0:
@@ -575,7 +583,17 @@ define <2 x i16> @vec_smin_smax(<2 x i16> %src) {
; GISEL-GFX9-NEXT: v_pk_min_i16 v0, v0, v1
; GISEL-GFX9-NEXT: v_pk_max_i16 v0, v0, 0
; GISEL-GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-GFX11-LABEL: vec_smin_smax:
+; GISEL-GFX11: ; %bb.0:
+; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX11-NEXT: v_pk_min_i16 v0, 0xff00ff, v0
+; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GISEL-GFX11-NEXT: v_pk_max_i16 v0, v0, 0
+; GISEL-GFX11-NEXT: s_setpc_b64 s[30:31]
%src.min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %src, <2 x i16> <i16 255, i16 255>)
%src.clamp = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %src.min, <2 x i16> <i16 0, i16 0>)
ret <2 x i16> %src.clamp
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX11: {{.*}}
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
index 45a320a..829b0eb 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3p.s
@@ -463,7 +463,7 @@ v_pk_add_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x02,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -477,9 +477,12 @@ v_pk_add_i16 v5, null, exec_lo
v_pk_add_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x02,0xcc,0xc1,0xfe,0x00,0x00]
-v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+v_pk_add_i16 v5, 0x3800, m0 op_sel:[0,0] op_sel_hi:[1,1]
// GFX11: [0x05,0x40,0x02,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX11: [0x05,0x40,0x02,0xcc,0xf0,0xfa,0x00,0x18]
+
v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -508,7 +511,7 @@ v_pk_add_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0a,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -523,7 +526,7 @@ v_pk_add_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0a,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_add_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0a,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0a,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -553,7 +556,7 @@ v_pk_ashrrev_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_ashrrev_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x06,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_ashrrev_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -568,7 +571,7 @@ v_pk_ashrrev_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x06,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x06,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x06,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -643,7 +646,7 @@ v_pk_lshlrev_b16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshlrev_b16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x04,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshlrev_b16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -658,7 +661,7 @@ v_pk_lshlrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x04,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x04,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x04,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
@@ -688,7 +691,7 @@ v_pk_lshrrev_b16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshrrev_b16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x05,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshrrev_b16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -703,7 +706,7 @@ v_pk_lshrrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x05,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x05,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x05,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -733,7 +736,7 @@ v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15
// GFX11: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_i16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX11: [0x05,0x00,0x00,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x00,0x00,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX11: [0x05,0x40,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -748,7 +751,7 @@ v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX11: [0x05,0x40,0x00,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX11: [0x05,0x48,0x00,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX11: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -778,7 +781,7 @@ v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15
// GFX11: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_u16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX11: [0x05,0x00,0x09,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x00,0x09,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX11: [0x05,0x40,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -793,7 +796,7 @@ v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX11: [0x05,0x40,0x09,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX11: [0x05,0x48,0x09,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX11: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -868,7 +871,7 @@ v_pk_max_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x07,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -883,7 +886,7 @@ v_pk_max_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x07,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x07,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x07,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -913,7 +916,7 @@ v_pk_max_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0c,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -928,7 +931,7 @@ v_pk_max_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0c,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0c,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0c,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1003,7 +1006,7 @@ v_pk_min_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x08,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1018,7 +1021,7 @@ v_pk_min_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x08,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x08,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x08,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1048,7 +1051,7 @@ v_pk_min_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0d,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1063,7 +1066,7 @@ v_pk_min_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0d,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0d,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0d,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1138,7 +1141,7 @@ v_pk_mul_lo_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_mul_lo_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x01,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_mul_lo_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1153,7 +1156,7 @@ v_pk_mul_lo_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x01,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x01,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x01,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1183,7 +1186,7 @@ v_pk_sub_i16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_i16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x03,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_i16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1198,7 +1201,7 @@ v_pk_sub_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x03,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x03,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x03,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1228,7 +1231,7 @@ v_pk_sub_u16 v5, ttmp15, src_scc
// GFX11: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_u16 v5, m0, 0.5
-// GFX11: [0x05,0x40,0x0b,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_u16 v5, exec_lo, -1
// GFX11: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1243,7 +1246,7 @@ v_pk_sub_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX11: [0x05,0x58,0x0b,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX11: [0x05,0x40,0x0b,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX11: [0x05,0x40,0x0b,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX11: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
index 9a21f7a..a8347fb 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3p.s
@@ -463,7 +463,7 @@ v_pk_add_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x02,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -478,7 +478,7 @@ v_pk_add_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x02,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_add_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x02,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x02,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -508,7 +508,7 @@ v_pk_add_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_add_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0a,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_add_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -523,7 +523,7 @@ v_pk_add_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0a,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_add_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0a,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0a,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -553,7 +553,7 @@ v_pk_ashrrev_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_ashrrev_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x06,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_ashrrev_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -568,7 +568,7 @@ v_pk_ashrrev_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x06,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x06,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x06,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -643,7 +643,7 @@ v_pk_lshlrev_b16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshlrev_b16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x04,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshlrev_b16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -658,6 +658,9 @@ v_pk_lshlrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x04,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
+// GFX12: [0x05,0x40,0x04,0xcc,0xf0,0xfa,0x00,0x18]
+
+v_pk_lshlrev_b16 v5, 0x3800, m0 op_sel:[0,0] op_sel_hi:[1,1]
// GFX12: [0x05,0x40,0x04,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
@@ -688,7 +691,7 @@ v_pk_lshrrev_b16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_lshrrev_b16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x05,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_lshrrev_b16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -703,7 +706,7 @@ v_pk_lshrrev_b16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x05,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x05,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x05,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -733,7 +736,7 @@ v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15
// GFX12: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_i16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX12: [0x05,0x00,0x00,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x00,0x00,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX12: [0x05,0x40,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -748,7 +751,7 @@ v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX12: [0x05,0x40,0x00,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX12: [0x05,0x48,0x00,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX12: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -778,7 +781,7 @@ v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15
// GFX12: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
v_pk_mad_u16 v5, m0, 0.5, m0 op_sel_hi:[0,0,0]
-// GFX12: [0x05,0x00,0x09,0xcc,0x7d,0xfe,0xf5,0x01,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x00,0x09,0xcc,0x7d,0xe0,0xf5,0x01]
v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,1]
// GFX12: [0x05,0x40,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -793,7 +796,7 @@ v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[0,0,0] op_sel_hi:[1,1,1]
// GFX12: [0x05,0x40,0x09,0xcc,0xc1,0xfe,0xf4,0x1b]
v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1]
-// GFX12: [0x05,0x48,0x09,0xcc,0xff,0xfa,0xfc,0x13,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1]
// GFX12: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -868,7 +871,7 @@ v_pk_max_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x07,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -883,7 +886,7 @@ v_pk_max_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x07,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x07,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x07,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -913,7 +916,7 @@ v_pk_max_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_max_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0c,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_max_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -928,7 +931,7 @@ v_pk_max_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0c,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_max_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0c,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0c,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1003,7 +1006,7 @@ v_pk_min_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x08,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1018,7 +1021,7 @@ v_pk_min_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x08,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x08,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x08,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1048,7 +1051,7 @@ v_pk_min_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_min_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0d,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_min_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1063,7 +1066,7 @@ v_pk_min_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0d,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_min_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0d,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0d,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1138,7 +1141,7 @@ v_pk_mul_lo_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_mul_lo_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x01,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_mul_lo_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1153,7 +1156,7 @@ v_pk_mul_lo_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x01,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x01,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x01,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1183,7 +1186,7 @@ v_pk_sub_i16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_i16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x03,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_i16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1198,7 +1201,7 @@ v_pk_sub_i16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x03,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_i16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x03,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x03,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1228,7 +1231,7 @@ v_pk_sub_u16 v5, ttmp15, src_scc
// GFX12: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
v_pk_sub_u16 v5, m0, 0.5
-// GFX12: [0x05,0x40,0x0b,0xcc,0x7d,0xfe,0x01,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
v_pk_sub_u16 v5, exec_lo, -1
// GFX12: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1243,7 +1246,7 @@ v_pk_sub_u16 v5, -1, exec_hi op_sel:[1,1] op_sel_hi:[0,0]
// GFX12: [0x05,0x58,0x0b,0xcc,0xc1,0xfe,0x00,0x00]
v_pk_sub_u16 v5, 0.5, m0 op_sel:[0,0] op_sel_hi:[1,1]
-// GFX12: [0x05,0x40,0x0b,0xcc,0xff,0xfa,0x00,0x18,0x00,0x38,0x00,0x00]
+// GFX12: [0x05,0x40,0x0b,0xcc,0xf0,0xfa,0x00,0x18]
v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1]
// GFX12: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s
index 5b1c7a76..c695bc3 100644
--- a/llvm/test/MC/AMDGPU/literalv216.s
+++ b/llvm/test/MC/AMDGPU/literalv216.s
@@ -113,6 +113,10 @@ v_pk_add_f16 v1, 0x0001, v2
// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x81,0x04,0x02,0x18]
v_pk_add_f16 v1, 0xffff, v2
+// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
+// GFX10: v_pk_add_f16 v1, 0xffff, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0xff,0xff,0x00,0x00]
+
+v_pk_add_f16 v1, 0xffffffff, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18]
// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18]
@@ -153,6 +157,10 @@ v_pk_add_f16 v1, 0x3118, v2
// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf8,0x04,0x02,0x18]
v_pk_add_f16 v1, 65535, v2
+// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
+// GFX10: v_pk_add_f16 v1, 0xffff, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0xff,0xff,0x00,0x00]
+
+v_pk_add_f16 v1, 4294967295, v2
// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18]
// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18]
@@ -242,7 +250,7 @@ v_pk_add_f16 v5, v1, 0.1234
v_pk_add_u16 v5, v1, 0.1234
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_add_u16 v5, v1, 0x3dfcb924 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x24,0xb9,0xfc,0x3d]
v_pk_fma_f16 v5, 0.1234, v2, v3
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
@@ -258,23 +266,23 @@ v_pk_fma_f16 v5, v1, v2, 0.1234
v_pk_mad_i16 v5, 0.1234, v2, v3
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_mad_i16 v5, 0x2fe6, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_mad_i16 v5, 0x3dfcb924, v2, v3 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0x04,0x0e,0x1c,0x24,0xb9,0xfc,0x3d]
v_pk_mad_i16 v5, v1, 0.1234, v3
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_mad_i16 v5, v1, 0x2fe6, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_mad_i16 v5, v1, 0x3dfcb924, v3 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0xff,0x0d,0x1c,0x24,0xb9,0xfc,0x3d]
v_pk_mad_i16 v5, v1, v2, 0.1234
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
-// GFX10: v_pk_mad_i16 v5, v1, v2, 0x2fe6 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0xe6,0x2f,0x00,0x00]
+// GFX10: v_pk_mad_i16 v5, v1, v2, 0x3dfcb924 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x24,0xb9,0xfc,0x3d]
v_pk_add_f16 v5, v1, 123456.0
// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
// NOGFX10: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
v_pk_add_u16 v5, v1, 123456.0
-// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
-// NOGFX10: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction
+// NOGFX9: :[[@LINE-1]]:{{[0-9]+}}: error: literal operands are not supported
+// GFX10: v_pk_add_u16 v5, v1, 0x47f12000 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x20,0xf1,0x47]
//===----------------------------------------------------------------------===//
// Packed VOP2
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt
index e42d0de..a022c79 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_vop3p_literalv216.txt
@@ -79,7 +79,7 @@
# GFX10: v_pk_fma_f16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b]
0x05,0x40,0x0e,0xcc,0xc1,0x84,0x0d,0x1b
-# GFX10: v_pk_mad_i16 v5, 0x3c00, 0x4000, 0x4400 ; encoding: [0x05,0x40,0x00,0xcc,0xff,0xfe,0xfd,0x1b,0x00,0x3c,0x00,0x00]
+# GFX10: v_pk_mad_i16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b]
0x05,0x40,0x00,0xcc,0xf2,0xe8,0xd9,0x1b
# GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
index bc2cb5f..838e6e0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3p.txt
@@ -466,7 +466,7 @@
# GFX11: v_pk_add_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_add_i16 v5, m0, 0x3800
+# GFX11: v_pk_add_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_add_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -481,7 +481,7 @@
# GFX11: v_pk_add_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_add_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_add_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -511,7 +511,7 @@
# GFX11: v_pk_add_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_add_u16 v5, m0, 0x3800
+# GFX11: v_pk_add_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_add_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -526,7 +526,7 @@
# GFX11: v_pk_add_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_add_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_add_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -556,7 +556,7 @@
# GFX11: v_pk_ashrrev_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_ashrrev_i16 v5, m0, 0x3800
+# GFX11: v_pk_ashrrev_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_ashrrev_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -571,7 +571,7 @@
# GFX11: v_pk_ashrrev_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_ashrrev_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -646,7 +646,7 @@
# GFX11: v_pk_lshlrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_lshlrev_b16 v5, m0, 0x3800
+# GFX11: v_pk_lshlrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_lshlrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -661,7 +661,7 @@
# GFX11: v_pk_lshlrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_lshlrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
@@ -691,7 +691,7 @@
# GFX11: v_pk_lshrrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_lshrrev_b16 v5, m0, 0x3800
+# GFX11: v_pk_lshrrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_lshrrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -706,7 +706,7 @@
# GFX11: v_pk_lshrrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_lshrrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -736,7 +736,7 @@
# GFX11: v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19
-# GFX11: v_pk_mad_i16 v5, m0, 0x3800, m0
+# GFX11: v_pk_mad_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19
# GFX11: v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -751,7 +751,7 @@
# GFX11: v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX11: v_pk_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX11: v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13
# GFX11: v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -781,7 +781,7 @@
# GFX11: v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19
-# GFX11: v_pk_mad_u16 v5, m0, 0x3800, m0
+# GFX11: v_pk_mad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19
# GFX11: v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -796,7 +796,7 @@
# GFX11: v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX11: v_pk_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX11: v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13
# GFX11: v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -871,7 +871,7 @@
# GFX11: v_pk_max_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_max_i16 v5, m0, 0x3800
+# GFX11: v_pk_max_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_max_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -886,7 +886,7 @@
# GFX11: v_pk_max_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_max_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_max_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -916,7 +916,7 @@
# GFX11: v_pk_max_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_max_u16 v5, m0, 0x3800
+# GFX11: v_pk_max_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_max_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -931,7 +931,7 @@
# GFX11: v_pk_max_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_max_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_max_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1006,7 +1006,7 @@
# GFX11: v_pk_min_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_min_i16 v5, m0, 0x3800
+# GFX11: v_pk_min_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_min_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1021,7 +1021,7 @@
# GFX11: v_pk_min_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_min_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_min_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1051,7 +1051,7 @@
# GFX11: v_pk_min_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_min_u16 v5, m0, 0x3800
+# GFX11: v_pk_min_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_min_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1066,7 +1066,7 @@
# GFX11: v_pk_min_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_min_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_min_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1141,7 +1141,7 @@
# GFX11: v_pk_mul_lo_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_mul_lo_u16 v5, m0, 0x3800
+# GFX11: v_pk_mul_lo_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_mul_lo_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1156,7 +1156,7 @@
# GFX11: v_pk_mul_lo_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_mul_lo_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1186,7 +1186,7 @@
# GFX11: v_pk_sub_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_sub_i16 v5, m0, 0x3800
+# GFX11: v_pk_sub_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_sub_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1201,7 +1201,7 @@
# GFX11: v_pk_sub_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_sub_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_sub_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1231,7 +1231,7 @@
# GFX11: v_pk_sub_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18
-# GFX11: v_pk_sub_u16 v5, m0, 0x3800
+# GFX11: v_pk_sub_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18
# GFX11: v_pk_sub_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1246,7 +1246,7 @@
# GFX11: v_pk_sub_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18
-# GFX11: v_pk_sub_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX11: v_pk_sub_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00
# GFX11: v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt
index 373cd71..44d8995 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3p.txt
@@ -463,7 +463,7 @@
# GFX12: v_pk_add_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_add_i16 v5, m0, 0x3800
+# GFX12: v_pk_add_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x02,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_add_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x02,0xcc,0x7e,0x82,0x01,0x18]
@@ -478,7 +478,7 @@
# GFX12: v_pk_add_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x02,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_add_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_add_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x02,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_add_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x02,0xcc,0xfd,0xd4,0x00,0x10]
@@ -508,7 +508,7 @@
# GFX12: v_pk_add_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_add_u16 v5, m0, 0x3800
+# GFX12: v_pk_add_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0a,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_add_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0a,0xcc,0x7e,0x82,0x01,0x18]
@@ -523,7 +523,7 @@
# GFX12: v_pk_add_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0a,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_add_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_add_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0a,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_add_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0a,0xcc,0xfd,0xd4,0x00,0x10]
@@ -553,7 +553,7 @@
# GFX12: v_pk_ashrrev_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_ashrrev_i16 v5, m0, 0x3800
+# GFX12: v_pk_ashrrev_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x06,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_ashrrev_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x06,0xcc,0x7e,0x82,0x01,0x18]
@@ -568,7 +568,7 @@
# GFX12: v_pk_ashrrev_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x06,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_ashrrev_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_ashrrev_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x06,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_ashrrev_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x06,0xcc,0xfd,0xd4,0x00,0x10]
@@ -643,7 +643,7 @@
# GFX12: v_pk_lshlrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_lshlrev_b16 v5, m0, 0x3800
+# GFX12: v_pk_lshlrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x04,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_lshlrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x04,0xcc,0x7e,0x82,0x01,0x18]
@@ -658,7 +658,7 @@
# GFX12: v_pk_lshlrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x04,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_lshlrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_lshlrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x04,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_lshlrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x04,0xcc,0xfd,0xd4,0x00,0x10]
@@ -688,7 +688,7 @@
# GFX12: v_pk_lshrrev_b16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_lshrrev_b16 v5, m0, 0x3800
+# GFX12: v_pk_lshrrev_b16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x05,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_lshrrev_b16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x05,0xcc,0x7e,0x82,0x01,0x18]
@@ -703,7 +703,7 @@
# GFX12: v_pk_lshrrev_b16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x05,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_lshrrev_b16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_lshrrev_b16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x05,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_lshrrev_b16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x05,0xcc,0xfd,0xd4,0x00,0x10]
@@ -733,7 +733,7 @@
# GFX12: v_pk_mad_i16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x00,0xcc,0x7b,0xfa,0xed,0x19
-# GFX12: v_pk_mad_i16 v5, m0, 0x3800, m0
+# GFX12: v_pk_mad_i16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x00,0xcc,0x7d,0xe0,0xf5,0x19
# GFX12: v_pk_mad_i16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x00,0xcc,0x7e,0x82,0xad,0x01]
@@ -748,7 +748,7 @@
# GFX12: v_pk_mad_i16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x00,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX12: v_pk_mad_i16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX12: v_pk_mad_i16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x00,0xcc,0xf0,0xfa,0xc0,0x13
# GFX12: v_pk_mad_i16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x00,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -778,7 +778,7 @@
# GFX12: v_pk_mad_u16 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19]
0x05,0x40,0x09,0xcc,0x7b,0xfa,0xed,0x19
-# GFX12: v_pk_mad_u16 v5, m0, 0x3800, m0
+# GFX12: v_pk_mad_u16 v5, m0, 0.5, m0 ; encoding: [0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19]
0x05,0x40,0x09,0xcc,0x7d,0xe0,0xf5,0x19
# GFX12: v_pk_mad_u16 v5, exec_lo, -1, vcc_hi op_sel_hi:[0,0,0] ; encoding: [0x05,0x00,0x09,0xcc,0x7e,0x82,0xad,0x01]
@@ -793,7 +793,7 @@
# GFX12: v_pk_mad_u16 v5, -1, exec_hi, src_scc op_sel:[1,1,1] op_sel_hi:[1,0,0] ; encoding: [0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b]
0x05,0x38,0x09,0xcc,0xc1,0xfe,0xf4,0x0b
-# GFX12: v_pk_mad_u16 v5, 0x3800, m0, 0x3800 op_sel:[1,0,0] op_sel_hi:[0,1,1]
+# GFX12: v_pk_mad_u16 v5, 0.5, m0, 0.5 op_sel:[1,0,0] op_sel_hi:[0,1,1] ; encoding: [0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13]
0x05,0x48,0x09,0xcc,0xf0,0xfa,0xc0,0x13
# GFX12: v_pk_mad_u16 v5, src_scc, vcc_lo, -1 op_sel:[0,1,0] op_sel_hi:[1,0,1] ; encoding: [0x05,0x50,0x09,0xcc,0xfd,0xd4,0x04,0x0b]
@@ -868,7 +868,7 @@
# GFX12: v_pk_max_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_max_i16 v5, m0, 0x3800
+# GFX12: v_pk_max_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x07,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_max_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x07,0xcc,0x7e,0x82,0x01,0x18]
@@ -883,7 +883,7 @@
# GFX12: v_pk_max_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x07,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_max_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_max_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x07,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_max_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x07,0xcc,0xfd,0xd4,0x00,0x10]
@@ -913,7 +913,7 @@
# GFX12: v_pk_max_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_max_u16 v5, m0, 0x3800
+# GFX12: v_pk_max_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0c,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_max_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0c,0xcc,0x7e,0x82,0x01,0x18]
@@ -928,7 +928,7 @@
# GFX12: v_pk_max_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0c,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_max_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_max_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0c,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_max_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0c,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1003,7 +1003,7 @@
# GFX12: v_pk_min_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_min_i16 v5, m0, 0x3800
+# GFX12: v_pk_min_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x08,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_min_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x08,0xcc,0x7e,0x82,0x01,0x18]
@@ -1018,7 +1018,7 @@
# GFX12: v_pk_min_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x08,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_min_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_min_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x08,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_min_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x08,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1048,7 +1048,7 @@
# GFX12: v_pk_min_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_min_u16 v5, m0, 0x3800
+# GFX12: v_pk_min_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0d,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_min_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0d,0xcc,0x7e,0x82,0x01,0x18]
@@ -1063,7 +1063,7 @@
# GFX12: v_pk_min_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0d,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_min_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_min_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0d,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_min_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0d,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1138,7 +1138,7 @@
# GFX12: v_pk_mul_lo_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_mul_lo_u16 v5, m0, 0x3800
+# GFX12: v_pk_mul_lo_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x01,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_mul_lo_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x01,0xcc,0x7e,0x82,0x01,0x18]
@@ -1153,7 +1153,7 @@
# GFX12: v_pk_mul_lo_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x01,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_mul_lo_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_mul_lo_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x01,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_mul_lo_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x01,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1183,7 +1183,7 @@
# GFX12: v_pk_sub_i16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_sub_i16 v5, m0, 0x3800
+# GFX12: v_pk_sub_i16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x03,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_sub_i16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x03,0xcc,0x7e,0x82,0x01,0x18]
@@ -1198,7 +1198,7 @@
# GFX12: v_pk_sub_i16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x03,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_sub_i16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_sub_i16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x03,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_sub_i16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x03,0xcc,0xfd,0xd4,0x00,0x10]
@@ -1228,7 +1228,7 @@
# GFX12: v_pk_sub_u16 v5, ttmp15, src_scc ; encoding: [0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7b,0xfa,0x01,0x18
-# GFX12: v_pk_sub_u16 v5, m0, 0x3800
+# GFX12: v_pk_sub_u16 v5, m0, 0.5 ; encoding: [0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18]
0x05,0x40,0x0b,0xcc,0x7d,0xe0,0x01,0x18
# GFX12: v_pk_sub_u16 v5, exec_lo, -1 ; encoding: [0x05,0x40,0x0b,0xcc,0x7e,0x82,0x01,0x18]
@@ -1243,7 +1243,7 @@
# GFX12: v_pk_sub_u16 v5, -1, exec_hi ; encoding: [0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18]
0x05,0x40,0x0b,0xcc,0xc1,0xfe,0x00,0x18
-# GFX12: v_pk_sub_u16 v5, 0x3800, m0 op_sel:[1,1] op_sel_hi:[0,0]
+# GFX12: v_pk_sub_u16 v5, 0.5, m0 op_sel:[1,1] op_sel_hi:[0,0] ; encoding: [0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00]
0x05,0x58,0x0b,0xcc,0xf0,0xfa,0x00,0x00
# GFX12: v_pk_sub_u16 v5, src_scc, vcc_lo op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x05,0x48,0x0b,0xcc,0xfd,0xd4,0x00,0x10]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt
index 215453d..003ece9 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3p.txt
@@ -42,10 +42,10 @@
# CHECK: v_pk_mad_i16 v5, -1, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xc1,0x04,0x0e,0x1c]
0x05,0x40,0x80,0xd3,0xc1,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_i16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_i16 v5, 0.5, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xf0,0x04,0x0e,0x1c]
0x05,0x40,0x80,0xd3,0xf0,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_i16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_i16 v5, -4.0, v2, v3 ; encoding: [0x05,0x40,0x80,0xd3,0xf7,0x04,0x0e,0x1c]
0x05,0x40,0x80,0xd3,0xf7,0x04,0x0e,0x1c
# CHECK: v_pk_mad_i16 v5, v1, v255, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0f,0x1c]
@@ -84,10 +84,10 @@
# CHECK: v_pk_mad_i16 v5, v1, -1, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x83,0x0d,0x1c]
0x05,0x40,0x80,0xd3,0x01,0x83,0x0d,0x1c
-# CHECK: v_pk_mad_i16 v5, v1, 0x3800, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_i16 v5, v1, 0.5, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xe1,0x0d,0x1c]
0x05,0x40,0x80,0xd3,0x01,0xe1,0x0d,0x1c
-# CHECK: v_pk_mad_i16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_i16 v5, v1, -4.0, v3 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0xef,0x0d,0x1c]
0x05,0x40,0x80,0xd3,0x01,0xef,0x0d,0x1c
# CHECK: v_pk_mad_i16 v5, v1, v2, v255 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1f]
@@ -126,10 +126,10 @@
# CHECK: v_pk_mad_i16 v5, v1, v2, -1 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0x06,0x1b]
0x05,0x40,0x80,0xd3,0x01,0x05,0x06,0x1b
-# CHECK: v_pk_mad_i16 v5, v1, v2, 0x3800 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_i16 v5, v1, v2, 0.5 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xc2,0x1b]
0x05,0x40,0x80,0xd3,0x01,0x05,0xc2,0x1b
-# CHECK: v_pk_mad_i16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_i16 v5, v1, v2, -4.0 ; encoding: [0x05,0x40,0x80,0xd3,0x01,0x05,0xde,0x1b]
0x05,0x40,0x80,0xd3,0x01,0x05,0xde,0x1b
# CHECK: v_pk_mad_i16 v5, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x05,0x48,0x80,0xd3,0x01,0x05,0x0e,0x1c]
@@ -201,10 +201,10 @@
# CHECK: v_pk_mul_lo_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x81,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_mul_lo_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x81,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_mul_lo_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x81,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_mul_lo_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x03,0x18]
@@ -243,10 +243,10 @@
# CHECK: v_pk_mul_lo_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x81,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_mul_lo_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x81,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_mul_lo_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_mul_lo_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x81,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x81,0xd3,0x01,0x05,0x02,0x18]
@@ -309,10 +309,10 @@
# CHECK: v_pk_add_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x82,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_add_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x82,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_add_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x82,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_add_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x03,0x18]
@@ -351,10 +351,10 @@
# CHECK: v_pk_add_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x82,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_add_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x82,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_add_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x82,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x82,0xd3,0x01,0x05,0x02,0x18]
@@ -420,10 +420,10 @@
# CHECK: v_pk_sub_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x83,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_sub_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x83,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_sub_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x83,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_sub_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x03,0x18]
@@ -462,10 +462,10 @@
# CHECK: v_pk_sub_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x83,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_sub_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x83,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_sub_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x83,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x83,0xd3,0x01,0x05,0x02,0x18]
@@ -531,10 +531,10 @@
# CHECK: v_pk_lshlrev_b16 v5, -1, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x84,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_lshlrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x84,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_lshlrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x84,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_lshlrev_b16 v5, v1, v255 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x03,0x18]
@@ -573,10 +573,10 @@
# CHECK: v_pk_lshlrev_b16 v5, v1, -1 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x84,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_lshlrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x84,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_lshlrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshlrev_b16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x84,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x84,0xd3,0x01,0x05,0x02,0x18]
@@ -639,10 +639,10 @@
# CHECK: v_pk_lshrrev_b16 v5, -1, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x85,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_lshrrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x85,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_lshrrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x85,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_lshrrev_b16 v5, v1, v255 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x03,0x18]
@@ -681,10 +681,10 @@
# CHECK: v_pk_lshrrev_b16 v5, v1, -1 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x85,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_lshrrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x85,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_lshrrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_lshrrev_b16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x85,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x85,0xd3,0x01,0x05,0x02,0x18]
@@ -747,10 +747,10 @@
# CHECK: v_pk_ashrrev_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x86,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_ashrrev_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x86,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_ashrrev_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x86,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_ashrrev_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x03,0x18]
@@ -789,10 +789,10 @@
# CHECK: v_pk_ashrrev_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x86,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_ashrrev_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x86,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_ashrrev_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_ashrrev_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x86,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x86,0xd3,0x01,0x05,0x02,0x18]
@@ -855,10 +855,10 @@
# CHECK: v_pk_max_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x87,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_max_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x87,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_max_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x87,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_max_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x03,0x18]
@@ -897,10 +897,10 @@
# CHECK: v_pk_max_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x87,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_max_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x87,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_max_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x87,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x87,0xd3,0x01,0x05,0x02,0x18]
@@ -963,10 +963,10 @@
# CHECK: v_pk_min_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x88,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_min_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_i16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x88,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_min_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_i16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x88,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_min_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x03,0x18]
@@ -1005,10 +1005,10 @@
# CHECK: v_pk_min_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x88,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_min_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_i16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x88,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_min_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_i16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x88,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x88,0xd3,0x01,0x05,0x02,0x18]
@@ -1071,10 +1071,10 @@
# CHECK: v_pk_mad_u16 v5, -1, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xc1,0x04,0x0e,0x1c]
0x05,0x40,0x89,0xd3,0xc1,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_u16 v5, 0x3800, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_u16 v5, 0.5, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xf0,0x04,0x0e,0x1c]
0x05,0x40,0x89,0xd3,0xf0,0x04,0x0e,0x1c
-# CHECK: v_pk_mad_u16 v5, 0xc400, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xff,0x04,0x0e,0x1c]
+# CHECK: v_pk_mad_u16 v5, -4.0, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0xf7,0x04,0x0e,0x1c]
0x05,0x40,0x89,0xd3,0xf7,0x04,0x0e,0x1c
# CHECK: v_pk_mad_u16 v5, v1, v255, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0f,0x1c]
@@ -1113,10 +1113,10 @@
# CHECK: v_pk_mad_u16 v5, v1, -1, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x83,0x0d,0x1c]
0x05,0x40,0x89,0xd3,0x01,0x83,0x0d,0x1c
-# CHECK: v_pk_mad_u16 v5, v1, 0x3800, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_u16 v5, v1, 0.5, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xe1,0x0d,0x1c]
0x05,0x40,0x89,0xd3,0x01,0xe1,0x0d,0x1c
-# CHECK: v_pk_mad_u16 v5, v1, 0xc400, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xff,0x0d,0x1c]
+# CHECK: v_pk_mad_u16 v5, v1, -4.0, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0xef,0x0d,0x1c]
0x05,0x40,0x89,0xd3,0x01,0xef,0x0d,0x1c
# CHECK: v_pk_mad_u16 v5, v1, v2, v255 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1f]
@@ -1155,10 +1155,10 @@
# CHECK: v_pk_mad_u16 v5, v1, v2, -1 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x06,0x1b]
0x05,0x40,0x89,0xd3,0x01,0x05,0x06,0x1b
-# CHECK: v_pk_mad_u16 v5, v1, v2, 0x3800 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_u16 v5, v1, v2, 0.5 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xc2,0x1b]
0x05,0x40,0x89,0xd3,0x01,0x05,0xc2,0x1b
-# CHECK: v_pk_mad_u16 v5, v1, v2, 0xc400 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xfe,0x1b]
+# CHECK: v_pk_mad_u16 v5, v1, v2, -4.0 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0xde,0x1b]
0x05,0x40,0x89,0xd3,0x01,0x05,0xde,0x1b
# CHECK: v_pk_mad_u16 v5, v1, v2, v3 op_sel:[1,0,0] ; encoding: [0x05,0x48,0x89,0xd3,0x01,0x05,0x0e,0x1c]
@@ -1230,10 +1230,10 @@
# CHECK: v_pk_add_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8a,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_add_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8a,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_add_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_add_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8a,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_add_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x03,0x18]
@@ -1272,10 +1272,10 @@
# CHECK: v_pk_add_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8a,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_add_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8a,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_add_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_add_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8a,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8a,0xd3,0x01,0x05,0x02,0x18]
@@ -1341,10 +1341,10 @@
# CHECK: v_pk_sub_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8b,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_sub_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8b,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_sub_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_sub_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8b,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_sub_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x03,0x18]
@@ -1383,10 +1383,10 @@
# CHECK: v_pk_sub_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8b,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_sub_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8b,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_sub_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_sub_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8b,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8b,0xd3,0x01,0x05,0x02,0x18]
@@ -1452,10 +1452,10 @@
# CHECK: v_pk_max_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8c,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_max_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8c,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_max_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_max_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8c,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_max_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x03,0x18]
@@ -1494,10 +1494,10 @@
# CHECK: v_pk_max_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8c,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_max_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8c,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_max_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_max_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8c,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8c,0xd3,0x01,0x05,0x02,0x18]
@@ -1560,10 +1560,10 @@
# CHECK: v_pk_min_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xc1,0x04,0x02,0x18]
0x05,0x00,0x8d,0xd3,0xc1,0x04,0x02,0x18
-# CHECK: v_pk_min_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_u16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xf0,0x04,0x02,0x18]
0x05,0x00,0x8d,0xd3,0xf0,0x04,0x02,0x18
-# CHECK: v_pk_min_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x04,0x02,0x18]
+# CHECK: v_pk_min_u16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xf7,0x04,0x02,0x18]
0x05,0x00,0x8d,0xd3,0xf7,0x04,0x02,0x18
# CHECK: v_pk_min_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x03,0x18]
@@ -1602,10 +1602,10 @@
# CHECK: v_pk_min_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x83,0x01,0x18]
0x05,0x00,0x8d,0xd3,0x01,0x83,0x01,0x18
-# CHECK: v_pk_min_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_u16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xe1,0x01,0x18]
0x05,0x00,0x8d,0xd3,0x01,0xe1,0x01,0x18
-# CHECK: v_pk_min_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x01,0x18]
+# CHECK: v_pk_min_u16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xef,0x01,0x18]
0x05,0x00,0x8d,0xd3,0x01,0xef,0x01,0x18
# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8d,0xd3,0x01,0x05,0x02,0x18]