aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/GlobalISel
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel')
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll612
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll66
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir37
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll63
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll67
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir524
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir19
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir24
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir479
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll535
12 files changed, 2345 insertions, 97 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll
new file mode 100644
index 0000000..e117200
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll
@@ -0,0 +1,612 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+
+define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) {
+; GFX7-LABEL: s_add_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s16
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s16
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i16 %a, %b
+ ret i16 %c
+}
+
+define i16 @v_add_i16(i16 %a, i16 %b) {
+; GFX7-LABEL: v_add_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_nc_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i16 %a, %b
+ ret i16 %c
+}
+
+define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) {
+; GFX7-LABEL: s_add_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s16
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s16
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s16
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define i32 @v_add_i32(i32 %a, i32 %b) {
+; GFX7-LABEL: v_add_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i32 %a, %b
+ ret i32 %c
+}
+
+define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: s_add_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_i32 s16, s16, s18
+; GFX7-NEXT: s_add_i32 s17, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s16
+; GFX7-NEXT: v_mov_b32_e32 v1, s17
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_lshr_b32 s4, s16, 16
+; GFX9-NEXT: s_lshr_b32 s5, s17, 16
+; GFX9-NEXT: s_add_i32 s16, s16, s17
+; GFX9-NEXT: s_add_i32 s4, s4, s5
+; GFX9-NEXT: s_pack_ll_b32_b16 s4, s16, s4
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_lshr_b32 s4, s16, 16
+; GFX8-NEXT: s_lshr_b32 s5, s17, 16
+; GFX8-NEXT: s_add_i32 s4, s4, s5
+; GFX8-NEXT: s_add_i32 s16, s16, s17
+; GFX8-NEXT: s_and_b32 s4, 0xffff, s4
+; GFX8-NEXT: s_and_b32 s5, 0xffff, s16
+; GFX8-NEXT: s_lshl_b32 s4, s4, 16
+; GFX8-NEXT: s_or_b32 s4, s5, s4
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_lshr_b32 s4, s16, 16
+; GFX10-NEXT: s_lshr_b32 s5, s17, 16
+; GFX10-NEXT: s_add_i32 s16, s16, s17
+; GFX10-NEXT: s_add_i32 s4, s4, s5
+; GFX10-NEXT: s_pack_ll_b32_b16 s4, s16, s4
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_lshr_b32 s2, s0, 16
+; GFX11-NEXT: s_lshr_b32 s3, s1, 16
+; GFX11-NEXT: s_add_i32 s0, s0, s1
+; GFX11-NEXT: s_add_i32 s2, s2, s3
+; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_lshr_b32 s2, s0, 16
+; GFX12-NEXT: s_lshr_b32 s3, s1, 16
+; GFX12-NEXT: s_add_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_add_co_i32 s2, s2, s3
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_add_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u16_e32 v2, v0, v1
+; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_add_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) {
+; GFX7-LABEL: s_add_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_u32 s4, s16, s18
+; GFX7-NEXT: s_addc_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_add_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_u32 s4, s16, s18
+; GFX9-NEXT: s_addc_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_add_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_u32 s4, s16, s18
+; GFX8-NEXT: s_addc_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_add_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s16, s18
+; GFX10-NEXT: s_addc_u32 s5, s17, s19
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_add_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s0, s0, s2
+; GFX11-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_add_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+define i64 @v_add_i64(i64 %a, i64 %b) {
+; GFX7-LABEL: v_add_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_add_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_add_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_add_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_add_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_add_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = add i64 %a, %b
+ ret i64 %c
+}
+
+define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: s_uaddo_uadde:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_add_u32 s4, s16, s18
+; GFX7-NEXT: s_addc_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v4, s4
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s8, 1, 0
+; GFX7-NEXT: v_mov_b32_e32 v5, s5
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_uaddo_uadde:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_add_u32 s4, s16, s18
+; GFX9-NEXT: s_addc_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_uaddo_uadde:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_add_u32 s4, s16, s18
+; GFX8-NEXT: s_addc_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v4, s4
+; GFX8-NEXT: s_cselect_b32 s6, 1, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, s5
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_uaddo_uadde:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s16, s18
+; GFX10-NEXT: s_addc_u32 s5, s17, s19
+; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: v_mov_b32_e32 v5, s5
+; GFX10-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX10-NEXT: global_store_dword v[2:3], v6, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_uaddo_uadde:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s0, s0, s2
+; GFX11-NEXT: s_addc_u32 s1, s1, s3
+; GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX11-NEXT: v_mov_b32_e32 v6, s2
+; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX11-NEXT: global_store_b32 v[2:3], v6, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_uaddo_uadde:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_add_co_u32 s0, s0, s2
+; GFX12-NEXT: s_add_co_ci_u32 s1, s1, s3
+; GFX12-NEXT: s_cselect_b32 s2, 1, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX12-NEXT: v_mov_b32_e32 v6, s2
+; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX12-NEXT: global_store_b32 v[2:3], v6, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %add = extractvalue {i64, i1} %uaddo, 0
+ %of = extractvalue {i64, i1} %uaddo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %add, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: v_uaddo_uadde:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_uaddo_uadde:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT: global_store_dword v[6:7], v2, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_uaddo_uadde:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
+; GFX8-NEXT: flat_store_dword v[6:7], v2
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_uaddo_uadde:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX10-NEXT: global_store_dword v[6:7], v2, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_uaddo_uadde:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: global_store_b32 v[6:7], v2, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_uaddo_uadde:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: global_store_b32 v[6:7], v2, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+ %add = extractvalue {i64, i1} %uaddo, 0
+ %of = extractvalue {i64, i1} %uaddo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %add, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
new file mode 100644
index 0000000..1a7ccf0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
+
+define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) {
+; GFX7-LABEL: fcmp_uniform_select:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9
+; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb
+; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
+; GFX7-NEXT: s_mov_b32 s2, -1
+; GFX7-NEXT: s_waitcnt lgkmcnt(0)
+; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0
+; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5]
+; GFX7-NEXT: s_cselect_b32 s4, 1, 0
+; GFX7-NEXT: s_and_b32 s4, s4, 1
+; GFX7-NEXT: s_cmp_lg_u32 s4, 0
+; GFX7-NEXT: s_cselect_b32 s3, s7, s3
+; GFX7-NEXT: v_mov_b32_e32 v0, s3
+; GFX7-NEXT: s_mov_b32 s3, 0xf000
+; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT: s_endpgm
+;
+; GFX8-LABEL: fcmp_uniform_select:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0
+; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0
+; GFX8-NEXT: s_cselect_b32 s0, 1, 0
+; GFX8-NEXT: s_and_b32 s0, s0, 1
+; GFX8-NEXT: s_cmp_lg_u32 s0, 0
+; GFX8-NEXT: s_cselect_b32 s0, s1, s6
+; GFX8-NEXT: v_mov_b32_e32 v0, s2
+; GFX8-NEXT: v_mov_b32_e32 v2, s0
+; GFX8-NEXT: v_mov_b32_e32 v1, s3
+; GFX8-NEXT: flat_store_dword v[0:1], v2
+; GFX8-NEXT: s_endpgm
+;
+; GFX11-LABEL: fcmp_uniform_select:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x2
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c
+; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, 1, 0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-NEXT: s_and_b32 s0, s0, 1
+; GFX11-NEXT: s_cmp_lg_u32 s0, 0
+; GFX11-NEXT: s_cselect_b32 s0, s1, s6
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
+ %cmp = fcmp oeq float %a, 0.0
+ %sel = select i1 %cmp, i32 %b, i32 %c
+ store i32 %sel, ptr addrspace(1) %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
new file mode 100644
index 0000000..67cc016
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir
@@ -0,0 +1,37 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s
+
+---
+name: test_copy_scc_vcc
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GFX7-LABEL: name: test_copy_scc_vcc
+ ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc
+ ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX7-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GF8-LABEL: name: test_copy_scc_vcc
+ ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc
+ ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GF8-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0
+ ;
+ ; GFX11-LABEL: name: test_copy_scc_vcc
+ ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF
+ ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc
+ ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc
+ ; GFX11-NEXT: $sgpr0 = COPY [[COPY]]
+ ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0
+ %0:vcc(s1) = G_IMPLICIT_DEF
+ %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0
+ $sgpr0 = COPY %1
+ S_ENDPGM 0, implicit $sgpr0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
index 7714c03..d3e2118 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
@@ -113,9 +113,9 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -161,16 +161,17 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -208,11 +209,7 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -258,17 +255,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB14_3
-; CHECK-NEXT: .LBB14_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB14_3
+; CHECK-NEXT: .LBB14_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB14_3
; CHECK-NEXT: .LBB14_3:
%c = icmp ult i32 %v, 12
%ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c)
@@ -310,14 +303,12 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cmp_le_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
-; CHECK-NEXT: s_cmp_eq_u32 s0, 0
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -372,16 +363,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, s0
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
index 7b81669..250fbc7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
@@ -116,9 +116,9 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB8_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -164,16 +164,17 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB10_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB10_3
-; CHECK-NEXT: .LBB10_2: ; %true
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_xor_b32 s0, s0, 1
+; CHECK-NEXT: s_and_b32 s0, s0, 1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB10_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB10_3
+; CHECK-NEXT: .LBB10_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB10_3
; CHECK-NEXT: .LBB10_3:
%c = trunc i32 %v to i1
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -211,11 +212,7 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cbranch_scc1 .LBB12_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {
; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_cmp_lt_u32 s0, 12
-; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB14_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB14_3
-; CHECK-NEXT: .LBB14_2: ; %true
+; CHECK-NEXT: s_cbranch_scc1 .LBB14_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB14_3
+; CHECK-NEXT: .LBB14_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB14_3
; CHECK-NEXT: .LBB14_3:
%c = icmp ult i32 %v, 12
%ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c)
@@ -313,14 +306,12 @@ false:
define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {
; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:
; CHECK: ; %bb.0:
-; CHECK-NEXT: s_cmp_lt_u32 s0, 12
+; CHECK-NEXT: s_cmp_ge_u32 s0, 12
; CHECK-NEXT: s_cselect_b32 s0, 1, 0
-; CHECK-NEXT: s_cmp_gt_u32 s1, 34
+; CHECK-NEXT: s_cmp_le_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
-; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_eq_u64 s[0:1], 0
+; CHECK-NEXT: s_or_b32 s0, s0, s1
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
; CHECK-NEXT: s_cbranch_scc1 .LBB16_2
; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
@@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg
; CHECK-NEXT: s_cmp_gt_u32 s1, 34
; CHECK-NEXT: s_cselect_b32 s1, 1, 0
; CHECK-NEXT: s_and_b32 s0, s0, s1
-; CHECK-NEXT: s_and_b32 s0, 1, s0
-; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0
-; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
-; CHECK-NEXT: s_cbranch_scc0 .LBB18_2
-; CHECK-NEXT: ; %bb.1: ; %false
-; CHECK-NEXT: s_mov_b32 s0, 33
-; CHECK-NEXT: s_branch .LBB18_3
-; CHECK-NEXT: .LBB18_2: ; %true
+; CHECK-NEXT: s_cmp_lg_u32 s0, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB18_2
+; CHECK-NEXT: ; %bb.1: ; %true
; CHECK-NEXT: s_mov_b32 s0, 42
; CHECK-NEXT: s_branch .LBB18_3
+; CHECK-NEXT: .LBB18_2: ; %false
+; CHECK-NEXT: s_mov_b32 s0, 33
+; CHECK-NEXT: s_branch .LBB18_3
; CHECK-NEXT: .LBB18_3:
%v1c = icmp ult i32 %v1, 12
%v2c = icmp ugt i32 %v2, 34
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir
new file mode 100644
index 0000000..097372a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir
@@ -0,0 +1,524 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
+---
+name: add_s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: add_s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: add_s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_ADD %2, %3
+ %5:_(s16) = G_AND %4, %4
+...
+
+---
+name: add_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: add_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: add_s32_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: add_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = G_ADD %0, %1
+ %3:_(s32) = G_AND %2, %2
+...
+
+---
+name: add_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-LABEL: name: add_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 255
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[ADD]], [[ADD]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr2_sgpr3
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_CONSTANT i64 255
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: add_s64_sv
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $vgpr0_vgpr1
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: add_s64_vs
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: add_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: add_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_ADD %0, %1
+ %3:_(s64) = G_AND %2, %2
+...
+
+---
+name: uaddo_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: uaddo_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[UADDO1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[SELECT]], [[UADDO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %4, %2
+...
+
+---
+name: uaddo_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1
+ ; CHECK-LABEL: name: uaddo_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uaddo_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1
+ ; CHECK-LABEL: name: uaddo_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uaddo_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: uaddo_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_UADDO %0, %1
+ %4:_(s32) = G_ZEXT %3
+ %5:_(s32) = G_AND %2, %4
+...
+
+---
+name: uadde_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: uadde_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]]
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %7:_(s32) = G_AND %4, %6
+...
+
+---
+name: uadde_s32_ss_scc_use
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: uadde_s32_ss_scc_use
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]]
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3
+ %6:_(s32) = G_ZEXT %5
+ %8:_(s32) = G_AND %4, %6
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
index 54ee69f..30c958f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: add_s16_ss
legalized: true
@@ -19,13 +18,13 @@ body: |
; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]
; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -44,13 +43,13 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -69,13 +68,13 @@ body: |
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
---
@@ -93,11 +92,11 @@ body: |
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s16) = G_TRUNC %0
%3:_(s16) = G_TRUNC %1
%4:_(s16) = G_ADD %2, %3
- S_ENDPGM 0, implicit %4
+ %5:_(s16) = G_AND %4, %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
index 97018fa..01eb391 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s
---
name: add_v2s16_ss
@@ -18,16 +17,19 @@ body: |
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]
; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]]
; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32)
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR]]
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $sgpr1
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(<2 x s16>) = G_BUILD_VECTOR %3, %3
+ %5:_(<2 x s16>) = G_AND %2, %4
...
---
@@ -44,11 +46,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $sgpr0
%1:_(<2 x s16>) = COPY $vgpr0
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(<2 x s16>) = G_AND %2, %2
...
---
@@ -65,9 +67,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $sgpr0
%2:_(<2 x s16>) = G_ADD %0, %1
+ %3:_(<2 x s16>) = G_AND %2, %2
...
---
@@ -83,9 +87,9 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]]
- ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]
%0:_(<2 x s16>) = COPY $vgpr0
%1:_(<2 x s16>) = COPY $vgpr1
%2:_(<2 x s16>) = G_ADD %0, %1
- S_ENDPGM 0, implicit %2
+ %3:_(<2 x s16>) = G_AND %2, %2
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
index 7378c93..e0e783e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
@@ -77,10 +77,14 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s16) = G_SEXT %2
+ %4:_(s16) = G_CONSTANT i16 255
+ %5:_(s16) = G_AND %3, %4
...
---
@@ -215,9 +219,13 @@ body: |
; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_SEXT %1
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(s16) = G_AND %2, %3
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
index b0199d3..e3c01c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir
@@ -1,5 +1,107 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+
+---
+name: sub_s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: sub_s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[COPY2]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr0
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
+
+---
+name: sub_s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: sub_s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[TRUNC1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s16) = G_TRUNC %0
+ %3:_(s16) = G_TRUNC %1
+ %4:_(s16) = G_SUB %2, %3
+ %6:_(s16) = G_AND %4, %4
+...
---
name: sub_s32_ss
@@ -14,9 +116,11 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -33,9 +137,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -52,9 +158,11 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
...
---
@@ -70,7 +178,376 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = G_SUB %0, %1
+ %4:_(s32) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: sub_v2s16_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]]
+ ; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[LSHR]], [[LSHR1]]
+ ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC]]
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $sgpr1
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_v2s16_sv
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $sgpr0
+ %1:_(<2 x s16>) = COPY $vgpr0
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr0
+ ; CHECK-LABEL: name: sub_v2s16_vs
+ ; CHECK: liveins: $sgpr0, $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $sgpr0
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_v2s16_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: sub_v2s16_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]]
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s16>) = COPY $vgpr1
+ %2:_(<2 x s16>) = G_SUB %0, %1
+ %5:_(<2 x s16>) = G_AND %2, %2
+...
+
+---
+name: sub_s64_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr0_sgpr1
+ ; CHECK-LABEL: name: sub_s64_ss
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s64) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[SUB]], [[SUB]]
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: sub_s64_sv
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $sgpr0_sgpr1
+ %1:_(s64) = COPY $vgpr0_vgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-LABEL: name: sub_s64_vs
+ ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64)
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $sgpr0_sgpr1
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: sub_s64_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-LABEL: name: sub_s64_vv
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3
+ ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]]
+ ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32)
+ %0:_(s64) = COPY $vgpr0_vgpr1
+ %1:_(s64) = COPY $vgpr2_vgpr3
+ %2:_(s64) = G_SUB %0, %1
+ %4:_(s64) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ ; CHECK-LABEL: name: usubo_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1
+ ; CHECK-LABEL: name: usubo_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1
+ ; CHECK-LABEL: name: usubo_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usubo_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; CHECK-LABEL: name: usubo_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32), %3:_(s1) = G_USUBO %0, %1
+ %5:_(s32) = G_AND %2, %2
+...
+
+---
+name: usube_s32_ss
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_ss
+ ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_sv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_sv
+ ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_vs
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-LABEL: name: usube_s32_vs
+ ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
+ ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32)
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]]
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $sgpr1
+ %2:_(s32) = COPY $sgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
+...
+
+---
+name: usube_s32_vv
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-LABEL: name: usube_s32_vv
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
+ ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]]
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]]
+ ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]]
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %3:_(s1) = G_TRUNC %2
+ %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3
+ %7:_(s32) = G_AND %4, %4
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index 088c20a3..d4baa5f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -73,10 +73,14 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
%3:_(s16) = G_ZEXT %2
+ %4:_(s16) = G_CONSTANT i16 255
+ %5:_(s16) = G_AND %3, %4
...
---
@@ -209,9 +213,13 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]
%0:_(s32) = COPY $sgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s16) = G_ZEXT %1
+ %3:_(s16) = G_CONSTANT i16 255
+ %4:_(s16) = G_AND %2, %3
...
---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll
new file mode 100644
index 0000000..8b5958d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll
@@ -0,0 +1,535 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s
+
+define i16 @s_sub_i16(i16 inreg %a, i16 inreg %b) {
+; GFX7-LABEL: s_sub_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_i32 s4, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_i32 s4, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_i32 s4, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_i32 s4, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i16 %a, %b
+ ret i16 %c
+}
+
+define i16 @v_sub_i16(i16 %a, i16 %b) {
+; GFX7-LABEL: v_sub_i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u16 v0.l, v0.l, v1.l
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_nc_u16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i16 %a, %b
+ ret i16 %c
+}
+
+define i32 @s_sub_i32(i32 inreg %a, i32 inreg %b) {
+; GFX7-LABEL: s_sub_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_i32 s4, s16, s17
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_i32 s4, s16, s17
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_i32 s4, s16, s17
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_i32 s4, s16, s17
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_i32 s0, s0, s1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_i32 s0, s0, s1
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i32 %a, %b
+ ret i32 %c
+}
+
+define i32 @v_sub_i32(i32 %a, i32 %b) {
+; GFX7-LABEL: v_sub_i32:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i32:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i32:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_nc_u32_e32 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i32 %a, %b
+ ret i32 %c
+}
+
+; TODO: Add test for s_sub_v2i16. Instruction selector currently fails
+; to handle G_UNMERGE_VALUES.
+
+define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; GFX7-LABEL: v_sub_v2i16:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_v2i16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_v2i16:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1
+; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_v2i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_v2i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_v2i16:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_pk_sub_i16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub <2 x i16> %a, %b
+ ret <2 x i16> %c
+}
+
+define i64 @s_sub_i64(i64 inreg %a, i64 inreg %b) {
+; GFX7-LABEL: s_sub_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_u32 s4, s16, s18
+; GFX7-NEXT: s_subb_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v0, s4
+; GFX7-NEXT: v_mov_b32_e32 v1, s5
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_sub_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s4, s16, s18
+; GFX9-NEXT: s_subb_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_sub_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_u32 s4, s16, s18
+; GFX8-NEXT: s_subb_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: v_mov_b32_e32 v1, s5
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_sub_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s16, s18
+; GFX10-NEXT: s_subb_u32 s5, s17, s19
+; GFX10-NEXT: v_mov_b32_e32 v0, s4
+; GFX10-NEXT: v_mov_b32_e32 v1, s5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_sub_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s0, s0, s2
+; GFX11-NEXT: s_subb_u32 s1, s1, s3
+; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_sub_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[0:1], s[2:3]
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i64 %a, %b
+ ret i64 %c
+}
+
+define i64 @v_sub_i64(i64 %a, i64 %b) {
+; GFX7-LABEL: v_sub_i64:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_sub_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_sub_i64:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_sub_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_sub_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_sub_i64:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %c = sub i64 %a, %b
+ ret i64 %c
+}
+
+define void @s_usubo_usube(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: s_usubo_usube:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: s_sub_u32 s4, s16, s18
+; GFX7-NEXT: s_subb_u32 s5, s17, s19
+; GFX7-NEXT: v_mov_b32_e32 v4, s4
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_cselect_b32 s8, 1, 0
+; GFX7-NEXT: v_mov_b32_e32 v5, s5
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64
+; GFX7-NEXT: v_mov_b32_e32 v0, s8
+; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: s_usubo_usube:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s4, s16, s18
+; GFX9-NEXT: s_subb_u32 s5, s17, s19
+; GFX9-NEXT: v_mov_b32_e32 v4, s4
+; GFX9-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-NEXT: v_mov_b32_e32 v5, s5
+; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX9-NEXT: v_mov_b32_e32 v0, s6
+; GFX9-NEXT: global_store_dword v[2:3], v0, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: s_usubo_usube:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: s_sub_u32 s4, s16, s18
+; GFX8-NEXT: s_subb_u32 s5, s17, s19
+; GFX8-NEXT: v_mov_b32_e32 v4, s4
+; GFX8-NEXT: s_cselect_b32 s6, 1, 0
+; GFX8-NEXT: v_mov_b32_e32 v5, s5
+; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; GFX8-NEXT: v_mov_b32_e32 v0, s6
+; GFX8-NEXT: flat_store_dword v[2:3], v0
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: s_usubo_usube:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s16, s18
+; GFX10-NEXT: s_subb_u32 s5, s17, s19
+; GFX10-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-NEXT: v_mov_b32_e32 v4, s4
+; GFX10-NEXT: v_mov_b32_e32 v5, s5
+; GFX10-NEXT: v_mov_b32_e32 v6, s6
+; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off
+; GFX10-NEXT: global_store_dword v[2:3], v6, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: s_usubo_usube:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s0, s0, s2
+; GFX11-NEXT: s_subb_u32 s1, s1, s3
+; GFX11-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX11-NEXT: v_mov_b32_e32 v6, s2
+; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX11-NEXT: global_store_b32 v[2:3], v6, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: s_usubo_usube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_sub_co_u32 s0, s0, s2
+; GFX12-NEXT: s_sub_co_ci_u32 s1, s1, s3
+; GFX12-NEXT: s_cselect_b32 s2, 1, 0
+; GFX12-NEXT: s_wait_alu 0xfffe
+; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0
+; GFX12-NEXT: v_mov_b32_e32 v6, s2
+; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off
+; GFX12-NEXT: global_store_b32 v[2:3], v6, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %sub = extractvalue {i64, i1} %usubo, 0
+ %of = extractvalue {i64, i1} %usubo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %sub, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}
+
+define void @v_usubo_usube(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) {
+; GFX7-LABEL: v_usubo_usube:
+; GFX7: ; %bb.0:
+; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
+; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX7-NEXT: s_mov_b32 s6, 0
+; GFX7-NEXT: s_mov_b32 s7, 0xf000
+; GFX7-NEXT: s_mov_b64 s[4:5], 0
+; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64
+; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64
+; GFX7-NEXT: s_waitcnt vmcnt(0)
+; GFX7-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: v_usubo_usube:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT: global_store_dword v[6:7], v2, off
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_usubo_usube:
+; GFX8: ; %bb.0:
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
+; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
+; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1]
+; GFX8-NEXT: flat_store_dword v[6:7], v2
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_usubo_usube:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off
+; GFX10-NEXT: global_store_dword v[6:7], v2, off
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_usubo_usube:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: global_store_b32 v[6:7], v2, off
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: v_usubo_usube:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX12-NEXT: s_wait_alu 0xfffd
+; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: global_store_b32 v[6:7], v2, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
+ %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+ %sub = extractvalue {i64, i1} %usubo, 0
+ %of = extractvalue {i64, i1} %usubo, 1
+ %of32 = select i1 %of, i32 1, i32 0
+ store i64 %sub, ptr addrspace(1) %res
+ store i32 %of32, ptr addrspace(1) %carry
+ ret void
+}