diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel')
12 files changed, 2345 insertions, 97 deletions
| diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll new file mode 100644 index 0000000..e117200 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll @@ -0,0 +1,612 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s + +define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) { +; GFX7-LABEL: s_add_i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_add_i32 s16, s16, s17 +; GFX7-NEXT:    v_mov_b32_e32 v0, s16 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_add_i32 s16, s16, s17 +; GFX9-NEXT:    v_mov_b32_e32 v0, s16 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_add_i32 s16, s16, s17 +; GFX8-NEXT:    v_mov_b32_e32 v0, s16 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_add_i32 s16, s16, s17 +; GFX10-NEXT:    v_mov_b32_e32 v0, s16 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_add_i32 s0, s0, s1 +; GFX11-NEXT:    v_mov_b32_e32 v0, s0 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_add_co_i32 s0, s0, s1 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_mov_b32_e32 v0, s0 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add i16 %a, %b +  ret i16 %c +} + +define i16 @v_add_i16(i16 %a, i16 %b) { +; GFX7-LABEL: v_add_i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_add_u16_e32 v0, v0, v1 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_add_u16_e32 v0, v0, v1 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_add_nc_u16 v0, v0, v1 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_add_nc_u16 v0.l, v0.l, v1.l +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_add_nc_u16 v0, v0, v1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add i16 %a, %b +  ret i16 %c +} + +define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) { +; GFX7-LABEL: s_add_i32: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_add_i32 s16, s16, s17 +; GFX7-NEXT:    v_mov_b32_e32 v0, s16 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i32: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_add_i32 s16, s16, s17 +; GFX9-NEXT:    v_mov_b32_e32 v0, s16 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i32: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_add_i32 s16, s16, s17 +; GFX8-NEXT:    v_mov_b32_e32 v0, s16 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i32: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_add_i32 s16, s16, s17 +; GFX10-NEXT:    v_mov_b32_e32 v0, s16 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i32: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_add_i32 s0, s0, s1 +; GFX11-NEXT:    v_mov_b32_e32 v0, s0 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i32: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_add_co_i32 s0, s0, s1 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_mov_b32_e32 v0, s0 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add i32 %a, %b +  ret i32 %c +} + +define i32 @v_add_i32(i32 %a, i32 %b) { +; GFX7-LABEL: v_add_i32: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i32: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i32: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i32: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i32: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i32: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_add_nc_u32_e32 v0, v0, v1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add i32 %a, %b +  ret i32 %c +} + +define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GFX7-LABEL: s_add_v2i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_add_i32 s16, s16, s18 +; GFX7-NEXT:    s_add_i32 s17, s17, s19 +; GFX7-NEXT:    v_mov_b32_e32 v0, s16 +; GFX7-NEXT:    v_mov_b32_e32 v1, s17 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_v2i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_lshr_b32 s4, s16, 16 +; GFX9-NEXT:    s_lshr_b32 s5, s17, 16 +; GFX9-NEXT:    s_add_i32 s16, s16, s17 +; GFX9-NEXT:    s_add_i32 s4, s4, s5 +; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s16, s4 +; GFX9-NEXT:    v_mov_b32_e32 v0, s4 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_v2i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_lshr_b32 s4, s16, 16 +; GFX8-NEXT:    s_lshr_b32 s5, s17, 16 +; GFX8-NEXT:    s_add_i32 s4, s4, s5 +; GFX8-NEXT:    s_add_i32 s16, s16, s17 +; GFX8-NEXT:    s_and_b32 s4, 0xffff, s4 +; GFX8-NEXT:    s_and_b32 s5, 0xffff, s16 +; GFX8-NEXT:    s_lshl_b32 s4, s4, 16 +; GFX8-NEXT:    s_or_b32 s4, s5, s4 +; GFX8-NEXT:    v_mov_b32_e32 v0, s4 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_v2i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_lshr_b32 s4, s16, 16 +; GFX10-NEXT:    s_lshr_b32 s5, s17, 16 +; GFX10-NEXT:    s_add_i32 s16, s16, s17 +; GFX10-NEXT:    s_add_i32 s4, s4, s5 +; GFX10-NEXT:    s_pack_ll_b32_b16 s4, s16, s4 +; GFX10-NEXT:    v_mov_b32_e32 v0, s4 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_v2i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_lshr_b32 s2, s0, 16 +; GFX11-NEXT:    s_lshr_b32 s3, s1, 16 +; GFX11-NEXT:    s_add_i32 s0, s0, s1 +; GFX11-NEXT:    s_add_i32 s2, s2, s3 +; GFX11-NEXT:    s_pack_ll_b32_b16 s0, s0, s2 +; GFX11-NEXT:    v_mov_b32_e32 v0, s0 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_v2i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_lshr_b32 s2, s0, 16 +; GFX12-NEXT:    s_lshr_b32 s3, s1, 16 +; GFX12-NEXT:    s_add_co_i32 s0, s0, s1 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    s_add_co_i32 s2, s2, s3 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    s_pack_ll_b32_b16 s0, s0, s2 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_mov_b32_e32 v0, s0 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add <2 x i16> %a, %b +  ret <2 x i16> %c +} + +define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_add_v2i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT:    v_add_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_v2i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_pk_add_u16 v0, v0, v1 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_v2i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_add_u16_e32 v2, v0, v1 +; GFX8-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_v2i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_pk_add_u16 v0, v0, v1 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_v2i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_pk_add_u16 v0, v0, v1 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_v2i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_pk_add_u16 v0, v0, v1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add <2 x i16> %a, %b +  ret <2 x i16> %c +} + +define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) { +; GFX7-LABEL: s_add_i64: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_add_u32 s4, s16, s18 +; GFX7-NEXT:    s_addc_u32 s5, s17, s19 +; GFX7-NEXT:    v_mov_b32_e32 v0, s4 +; GFX7-NEXT:    v_mov_b32_e32 v1, s5 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i64: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_add_u32 s4, s16, s18 +; GFX9-NEXT:    s_addc_u32 s5, s17, s19 +; GFX9-NEXT:    v_mov_b32_e32 v0, s4 +; GFX9-NEXT:    v_mov_b32_e32 v1, s5 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i64: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_add_u32 s4, s16, s18 +; GFX8-NEXT:    s_addc_u32 s5, s17, s19 +; GFX8-NEXT:    v_mov_b32_e32 v0, s4 +; GFX8-NEXT:    v_mov_b32_e32 v1, s5 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i64: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_add_u32 s4, s16, s18 +; GFX10-NEXT:    s_addc_u32 s5, s17, s19 +; GFX10-NEXT:    v_mov_b32_e32 v0, s4 +; GFX10-NEXT:    v_mov_b32_e32 v1, s5 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i64: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_add_u32 s0, s0, s2 +; GFX11-NEXT:    s_addc_u32 s1, s1, s3 +; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i64: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add i64 %a, %b +  ret i64 %c +} + +define i64 @v_add_i64(i64 %a, i64 %b) { +; GFX7-LABEL: v_add_i64: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i64: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i64: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i64: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i64: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i64: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT:    s_wait_alu 0xfffd +; GFX12-NEXT:    v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = add i64 %a, %b +  ret i64 %c +} + +define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: s_uaddo_uadde: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_add_u32 s4, s16, s18 +; GFX7-NEXT:    s_addc_u32 s5, s17, s19 +; GFX7-NEXT:    v_mov_b32_e32 v4, s4 +; GFX7-NEXT:    s_mov_b32 s6, 0 +; GFX7-NEXT:    s_cselect_b32 s8, 1, 0 +; GFX7-NEXT:    v_mov_b32_e32 v5, s5 +; GFX7-NEXT:    s_mov_b32 s7, 0xf000 +; GFX7-NEXT:    s_mov_b64 s[4:5], 0 +; GFX7-NEXT:    buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT:    v_mov_b32_e32 v0, s8 +; GFX7-NEXT:    buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-NEXT:    s_waitcnt vmcnt(0) +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_uaddo_uadde: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_add_u32 s4, s16, s18 +; GFX9-NEXT:    s_addc_u32 s5, s17, s19 +; GFX9-NEXT:    v_mov_b32_e32 v4, s4 +; GFX9-NEXT:    s_cselect_b32 s6, 1, 0 +; GFX9-NEXT:    v_mov_b32_e32 v5, s5 +; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off +; GFX9-NEXT:    v_mov_b32_e32 v0, s6 +; GFX9-NEXT:    global_store_dword v[2:3], v0, off +; GFX9-NEXT:    s_waitcnt vmcnt(0) +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_uaddo_uadde: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_add_u32 s4, s16, s18 +; GFX8-NEXT:    s_addc_u32 s5, s17, s19 +; GFX8-NEXT:    v_mov_b32_e32 v4, s4 +; GFX8-NEXT:    s_cselect_b32 s6, 1, 0 +; GFX8-NEXT:    v_mov_b32_e32 v5, s5 +; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] +; GFX8-NEXT:    v_mov_b32_e32 v0, s6 +; GFX8-NEXT:    flat_store_dword v[2:3], v0 +; GFX8-NEXT:    s_waitcnt vmcnt(0) +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_uaddo_uadde: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_add_u32 s4, s16, s18 +; GFX10-NEXT:    s_addc_u32 s5, s17, s19 +; GFX10-NEXT:    s_cselect_b32 s6, 1, 0 +; GFX10-NEXT:    v_mov_b32_e32 v4, s4 +; GFX10-NEXT:    v_mov_b32_e32 v5, s5 +; GFX10-NEXT:    v_mov_b32_e32 v6, s6 +; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off +; GFX10-NEXT:    global_store_dword v[2:3], v6, off +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_uaddo_uadde: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_add_u32 s0, s0, s2 +; GFX11-NEXT:    s_addc_u32 s1, s1, s3 +; GFX11-NEXT:    s_cselect_b32 s2, 1, 0 +; GFX11-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX11-NEXT:    v_mov_b32_e32 v6, s2 +; GFX11-NEXT:    global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT:    global_store_b32 v[2:3], v6, off +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_uaddo_uadde: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_add_co_u32 s0, s0, s2 +; GFX12-NEXT:    s_add_co_ci_u32 s1, s1, s3 +; GFX12-NEXT:    s_cselect_b32 s2, 1, 0 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX12-NEXT:    v_mov_b32_e32 v6, s2 +; GFX12-NEXT:    global_store_b64 v[0:1], v[4:5], off +; GFX12-NEXT:    global_store_b32 v[2:3], v6, off +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) +  %add = extractvalue {i64, i1} %uaddo, 0 +  %of = extractvalue {i64, i1} %uaddo, 1 +  %of32 = select i1 %of, i32 1, i32 0 +  store i64 %add, ptr addrspace(1) %res +  store i32 %of32, ptr addrspace(1) %carry +  ret void +} + +define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: v_uaddo_uadde: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT:    s_mov_b32 s6, 0 +; GFX7-NEXT:    s_mov_b32 s7, 0xf000 +; GFX7-NEXT:    s_mov_b64 s[4:5], 0 +; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT:    buffer_store_dword v2, v[6:7], s[4:7], 0 addr64 +; GFX7-NEXT:    s_waitcnt vmcnt(0) +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_uaddo_uadde: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT:    global_store_dword v[6:7], v2, off +; GFX9-NEXT:    s_waitcnt vmcnt(0) +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_uaddo_uadde: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT:    flat_store_dwordx2 v[4:5], v[0:1] +; GFX8-NEXT:    flat_store_dword v[6:7], v2 +; GFX8-NEXT:    s_waitcnt vmcnt(0) +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_uaddo_uadde: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off +; GFX10-NEXT:    global_store_dword v[6:7], v2, off +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_uaddo_uadde: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT:    global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT:    global_store_b32 v[6:7], v2, off +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_uaddo_uadde: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT:    s_wait_alu 0xfffd +; GFX12-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT:    s_wait_alu 0xfffd +; GFX12-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT:    global_store_b32 v[6:7], v2, off +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) +  %add = extractvalue {i64, i1} %uaddo, 0 +  %of = extractvalue {i64, i1} %uaddo, 1 +  %of32 = select i1 %of, i32 1, i32 0 +  store i64 %add, ptr addrspace(1) %res +  store i32 %of32, ptr addrspace(1) %carry +  ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll new file mode 100644 index 0000000..1a7ccf0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s + +define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) { +; GFX7-LABEL: fcmp_uniform_select: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x9 +; GFX7-NEXT:    s_load_dword s3, s[4:5], 0xb +; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xd +; GFX7-NEXT:    s_mov_b32 s2, -1 +; GFX7-NEXT:    s_waitcnt lgkmcnt(0) +; GFX7-NEXT:    v_cmp_eq_f32_e64 s[4:5], s6, 0 +; GFX7-NEXT:    s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7-NEXT:    s_cselect_b32 s4, 1, 0 +; GFX7-NEXT:    s_and_b32 s4, s4, 1 +; GFX7-NEXT:    s_cmp_lg_u32 s4, 0 +; GFX7-NEXT:    s_cselect_b32 s3, s7, s3 +; GFX7-NEXT:    v_mov_b32_e32 v0, s3 +; GFX7-NEXT:    s_mov_b32 s3, 0xf000 +; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT:    s_endpgm +; +; GFX8-LABEL: fcmp_uniform_select: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x2c +; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x34 +; GFX8-NEXT:    s_waitcnt lgkmcnt(0) +; GFX8-NEXT:    v_cmp_eq_f32_e64 s[4:5], s0, 0 +; GFX8-NEXT:    s_cmp_lg_u64 s[4:5], 0 +; GFX8-NEXT:    s_cselect_b32 s0, 1, 0 +; GFX8-NEXT:    s_and_b32 s0, s0, 1 +; GFX8-NEXT:    s_cmp_lg_u32 s0, 0 +; GFX8-NEXT:    s_cselect_b32 s0, s1, s6 +; GFX8-NEXT:    v_mov_b32_e32 v0, s2 +; GFX8-NEXT:    v_mov_b32_e32 v2, s0 +; GFX8-NEXT:    v_mov_b32_e32 v1, s3 +; GFX8-NEXT:    flat_store_dword v[0:1], v2 +; GFX8-NEXT:    s_endpgm +; +; GFX11-LABEL: fcmp_uniform_select: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_clause 0x2 +; GFX11-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT:    s_load_b32 s6, s[4:5], 0x2c +; GFX11-NEXT:    s_load_b64 s[2:3], s[4:5], 0x34 +; GFX11-NEXT:    v_mov_b32_e32 v1, 0 +; GFX11-NEXT:    s_waitcnt lgkmcnt(0) +; GFX11-NEXT:    v_cmp_eq_f32_e64 s0, s0, 0 +; GFX11-NEXT:    s_cmp_lg_u32 s0, 0 +; GFX11-NEXT:    s_cselect_b32 s0, 1, 0 +; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT:    s_and_b32 s0, s0, 1 +; GFX11-NEXT:    s_cmp_lg_u32 s0, 0 +; GFX11-NEXT:    s_cselect_b32 s0, s1, s6 +; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT:    v_mov_b32_e32 v0, s0 +; GFX11-NEXT:    global_store_b32 v1, v0, s[2:3] +; GFX11-NEXT:    s_endpgm +  %cmp = fcmp oeq float %a, 0.0 +  %sel = select i1 %cmp, i32 %b, i32 %c +  store i32 %sel, ptr addrspace(1) %out +  ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir new file mode 100644 index 0000000..67cc016 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s + +--- +name: test_copy_scc_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | +  bb.0: +    ; GFX7-LABEL: name: test_copy_scc_vcc +    ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF +    ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc +    ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc +    ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] +    ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 +    ; +    ; GF8-LABEL: name: test_copy_scc_vcc +    ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF +    ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc +    ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc +    ; GF8-NEXT: $sgpr0 = COPY [[COPY]] +    ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0 +    ; +    ; GFX11-LABEL: name: test_copy_scc_vcc +    ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF +    ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc +    ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc +    ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] +    ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 +    %0:vcc(s1) = G_IMPLICIT_DEF +    %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0 +    $sgpr0 = COPY %1 +    S_ENDPGM 0, implicit $sgpr0 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll index 7714c03..d3e2118 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll @@ -113,9 +113,9 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT:    s_cmp_eq_u32 s0, 0 +; CHECK-NEXT:    s_xor_b32 s0, s0, 1 +; CHECK-NEXT:    s_and_b32 s0, s0, 1 +; CHECK-NEXT:    s_cmp_lg_u32 s0, 0  ; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2  ; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42 @@ -161,16 +161,17 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0 +; CHECK-NEXT:    s_xor_b32 s0, s0, 1 +; CHECK-NEXT:    s_xor_b32 s0, s0, 1 +; CHECK-NEXT:    s_and_b32 s0, s0, 1  ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 -; CHECK-NEXT:    s_cbranch_scc0 .LBB10_2 -; CHECK-NEXT:  ; %bb.1: ; %false -; CHECK-NEXT:    s_mov_b32 s0, 33 -; CHECK-NEXT:    s_branch .LBB10_3 -; CHECK-NEXT:  .LBB10_2: ; %true +; CHECK-NEXT:    s_cbranch_scc1 .LBB10_2 +; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42  ; CHECK-NEXT:    s_branch .LBB10_3 +; CHECK-NEXT:  .LBB10_2: ; %false +; CHECK-NEXT:    s_mov_b32 s0, 33 +; CHECK-NEXT:    s_branch .LBB10_3  ; CHECK-NEXT:  .LBB10_3:    %c = trunc i32 %v to i1    %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) @@ -208,11 +209,7 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 -; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT:    s_cmp_eq_u32 s0, 0 +; CHECK-NEXT:    s_cmp_ge_u32 s0, 12  ; CHECK-NEXT:    s_cbranch_scc1 .LBB12_2  ; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42 @@ -258,17 +255,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:  ; CHECK:       ; %bb.0:  ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 -; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 -; CHECK-NEXT:    s_cbranch_scc0 .LBB14_2 -; CHECK-NEXT:  ; %bb.1: ; %false -; CHECK-NEXT:    s_mov_b32 s0, 33 -; CHECK-NEXT:    s_branch .LBB14_3 -; CHECK-NEXT:  .LBB14_2: ; %true +; CHECK-NEXT:    s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42  ; CHECK-NEXT:    s_branch .LBB14_3 +; CHECK-NEXT:  .LBB14_2: ; %false +; CHECK-NEXT:    s_mov_b32 s0, 33 +; CHECK-NEXT:    s_branch .LBB14_3  ; CHECK-NEXT:  .LBB14_3:    %c = icmp ult i32 %v, 12    %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %c) @@ -310,14 +303,12 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {  ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 +; CHECK-NEXT:    s_cmp_ge_u32 s0, 12  ; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 -; CHECK-NEXT:    s_cmp_gt_u32 s1, 34 +; CHECK-NEXT:    s_cmp_le_u32 s1, 34  ; CHECK-NEXT:    s_cselect_b32 s1, 1, 0 -; CHECK-NEXT:    s_and_b32 s0, s0, s1 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0 -; CHECK-NEXT:    s_cmp_eq_u32 s0, 0 +; CHECK-NEXT:    s_or_b32 s0, s0, s1 +; CHECK-NEXT:    s_cmp_lg_u32 s0, 0  ; CHECK-NEXT:    s_cbranch_scc1 .LBB16_2  ; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42 @@ -372,16 +363,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg  ; CHECK-NEXT:    s_cmp_gt_u32 s1, 34  ; CHECK-NEXT:    s_cselect_b32 s1, 1, 0  ; CHECK-NEXT:    s_and_b32 s0, s0, s1 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, s0  ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 -; CHECK-NEXT:    s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT:  ; %bb.1: ; %false -; CHECK-NEXT:    s_mov_b32 s0, 33 -; CHECK-NEXT:    s_branch .LBB18_3 -; CHECK-NEXT:  .LBB18_2: ; %true +; CHECK-NEXT:    s_cbranch_scc1 .LBB18_2 +; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42  ; CHECK-NEXT:    s_branch .LBB18_3 +; CHECK-NEXT:  .LBB18_2: ; %false +; CHECK-NEXT:    s_mov_b32 s0, 33 +; CHECK-NEXT:    s_branch .LBB18_3  ; CHECK-NEXT:  .LBB18_3:    %v1c = icmp ult i32 %v1, 12    %v2c = icmp ugt i32 %v2, 34 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll index 7b81669..250fbc7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll @@ -116,9 +116,9 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT:    s_xor_b32 s0, s0, 1 +; CHECK-NEXT:    s_and_b32 s0, s0, 1 +; CHECK-NEXT:    s_cmp_lg_u32 s0, 0  ; CHECK-NEXT:    s_cbranch_scc1 .LBB8_2  ; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42 @@ -164,16 +164,17 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT:    s_cbranch_scc0 .LBB10_2 -; CHECK-NEXT:  ; %bb.1: ; %false -; CHECK-NEXT:    s_mov_b32 s0, 33 -; CHECK-NEXT:    s_branch .LBB10_3 -; CHECK-NEXT:  .LBB10_2: ; %true +; CHECK-NEXT:    s_xor_b32 s0, s0, 1 +; CHECK-NEXT:    s_xor_b32 s0, s0, 1 +; CHECK-NEXT:    s_and_b32 s0, s0, 1 +; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 +; CHECK-NEXT:    s_cbranch_scc1 .LBB10_2 +; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42  ; CHECK-NEXT:    s_branch .LBB10_3 +; CHECK-NEXT:  .LBB10_2: ; %false +; CHECK-NEXT:    s_mov_b32 s0, 33 +; CHECK-NEXT:    s_branch .LBB10_3  ; CHECK-NEXT:  .LBB10_3:    %c = trunc i32 %v to i1    %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -211,11 +212,7 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 -; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT:    s_cmp_ge_u32 s0, 12  ; CHECK-NEXT:    s_cbranch_scc1 .LBB12_2  ; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42 @@ -261,17 +258,13 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) {  ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare:  ; CHECK:       ; %bb.0:  ; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 -; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT:    s_cbranch_scc0 .LBB14_2 -; CHECK-NEXT:  ; %bb.1: ; %false -; CHECK-NEXT:    s_mov_b32 s0, 33 -; CHECK-NEXT:    s_branch .LBB14_3 -; CHECK-NEXT:  .LBB14_2: ; %true +; CHECK-NEXT:    s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42  ; CHECK-NEXT:    s_branch .LBB14_3 +; CHECK-NEXT:  .LBB14_2: ; %false +; CHECK-NEXT:    s_mov_b32 s0, 33 +; CHECK-NEXT:    s_branch .LBB14_3  ; CHECK-NEXT:  .LBB14_3:    %c = icmp ult i32 %v, 12    %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -313,14 +306,12 @@ false:  define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) {  ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and:  ; CHECK:       ; %bb.0: -; CHECK-NEXT:    s_cmp_lt_u32 s0, 12 +; CHECK-NEXT:    s_cmp_ge_u32 s0, 12  ; CHECK-NEXT:    s_cselect_b32 s0, 1, 0 -; CHECK-NEXT:    s_cmp_gt_u32 s1, 34 +; CHECK-NEXT:    s_cmp_le_u32 s1, 34  ; CHECK-NEXT:    s_cselect_b32 s1, 1, 0 -; CHECK-NEXT:    s_and_b32 s0, s0, s1 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT:    s_cmp_eq_u64 s[0:1], 0 +; CHECK-NEXT:    s_or_b32 s0, s0, s1 +; CHECK-NEXT:    s_cmp_lg_u32 s0, 0  ; CHECK-NEXT:    s_cbranch_scc1 .LBB16_2  ; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42 @@ -375,16 +366,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg  ; CHECK-NEXT:    s_cmp_gt_u32 s1, 34  ; CHECK-NEXT:    s_cselect_b32 s1, 1, 0  ; CHECK-NEXT:    s_and_b32 s0, s0, s1 -; CHECK-NEXT:    s_and_b32 s0, 1, s0 -; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0 -; CHECK-NEXT:    s_cmp_lg_u64 s[0:1], 0 -; CHECK-NEXT:    s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT:  ; %bb.1: ; %false -; CHECK-NEXT:    s_mov_b32 s0, 33 -; CHECK-NEXT:    s_branch .LBB18_3 -; CHECK-NEXT:  .LBB18_2: ; %true +; CHECK-NEXT:    s_cmp_lg_u32 s0, 0 +; CHECK-NEXT:    s_cbranch_scc1 .LBB18_2 +; CHECK-NEXT:  ; %bb.1: ; %true  ; CHECK-NEXT:    s_mov_b32 s0, 42  ; CHECK-NEXT:    s_branch .LBB18_3 +; CHECK-NEXT:  .LBB18_2: ; %false +; CHECK-NEXT:    s_mov_b32 s0, 33 +; CHECK-NEXT:    s_branch .LBB18_3  ; CHECK-NEXT:  .LBB18_3:    %v1c = icmp ult i32 %v1, 12    %v2c = icmp ugt i32 %v2, 34 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir new file mode 100644 index 0000000..097372a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir @@ -0,0 +1,524 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s +--- +name: add_s16_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: add_s16_ss +    ; CHECK: liveins: $sgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) +    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] +    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_ADD %2, %3 +    %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: add_s16_sv +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr0 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_ADD %2, %3 +    %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_vs +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: add_s16_vs +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr0 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_ADD %2, %3 +    %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: add_s16_vv +    ; CHECK: liveins: $vgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_ADD %2, %3 +    %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s32_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: add_s32_ss +    ; CHECK: liveins: $sgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = G_ADD %0, %1 +    %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: add_s32_sv +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr0 +    %2:_(s32) = G_ADD %0, %1 +    %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_vs +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: add_s32_vs +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr0 +    %2:_(s32) = G_ADD %0, %1 +    %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: add_s32_vv +    ; CHECK: liveins: $vgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = G_ADD %0, %1 +    %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s64_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 +    ; CHECK-LABEL: name: add_s64_ss +    ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 255 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[ADD]], [[ADD]] +    %0:_(s64) = COPY $sgpr0_sgpr1 +    %1:_(s64) = COPY $sgpr2_sgpr3 +    %2:_(s64) = G_ADD %0, %1 +    %3:_(s64) = G_CONSTANT i64 255 +    %4:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-LABEL: name: add_s64_sv +    ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) +    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] +    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) +    %0:_(s64) = COPY $sgpr0_sgpr1 +    %1:_(s64) = COPY $vgpr0_vgpr1 +    %2:_(s64) = G_ADD %0, %1 +    %3:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_vs +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-LABEL: name: add_s64_vs +    ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) +    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] +    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) +    %0:_(s64) = COPY $vgpr0_vgpr1 +    %1:_(s64) = COPY $sgpr0_sgpr1 +    %2:_(s64) = G_ADD %0, %1 +    %3:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 +    ; CHECK-LABEL: name: add_s64_vv +    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 +    ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) +    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] +    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) +    %0:_(s64) = COPY $vgpr0_vgpr1 +    %1:_(s64) = COPY $vgpr2_vgpr3 +    %2:_(s64) = G_ADD %0, %1 +    %3:_(s64) = G_AND %2, %2 +... + +--- +name: uaddo_s32_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: uaddo_s32_ss +    ; CHECK: liveins: $sgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[UADDO1]], [[C]] +    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[SELECT]], [[UADDO]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32), %3:_(s1) = G_UADDO %0, %1 +    %4:_(s32) = G_ZEXT %3 +    %5:_(s32) = G_AND %4, %2 +... + +--- +name: uaddo_s32_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr1 +    ; CHECK-LABEL: name: uaddo_s32_sv +    ; CHECK: liveins: $sgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) +    ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32), %3:_(s1) = G_UADDO %0, %1 +    %4:_(s32) = G_ZEXT %3 +    %5:_(s32) = G_AND %2, %4 +... + +--- +name: uaddo_s32_vs +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr1 +    ; CHECK-LABEL: name: uaddo_s32_vs +    ; CHECK: liveins: $vgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32), %3:_(s1) = G_UADDO %0, %1 +    %4:_(s32) = G_ZEXT %3 +    %5:_(s32) = G_AND %2, %4 +... + +--- +name: uaddo_s32_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: uaddo_s32_vv +    ; CHECK: liveins: $vgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32), %3:_(s1) = G_UADDO %0, %1 +    %4:_(s32) = G_ZEXT %3 +    %5:_(s32) = G_AND %2, %4 +... + +--- +name: uadde_s32_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1, $sgpr2 +    ; CHECK-LABEL: name: uadde_s32_ss +    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] +    ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]] +    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 +    %6:_(s32) = G_ZEXT %5 +    %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr1, $sgpr2 +    ; CHECK-LABEL: name: uadde_s32_sv +    ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) +    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) +    ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 +    %6:_(s32) = G_ZEXT %5 +    %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_vs +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr1, $sgpr2 +    ; CHECK-LABEL: name: uadde_s32_vs +    ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) +    ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]] +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 +    %6:_(s32) = G_ZEXT %5 +    %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1, $vgpr2 +    ; CHECK-LABEL: name: uadde_s32_vv +    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]] +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] +    ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]] +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = COPY $vgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 +    %6:_(s32) = G_ZEXT %5 +    %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_ss_scc_use +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1, $sgpr2 +    ; CHECK-LABEL: name: uadde_s32_ss_scc_use +    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] +    ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]] +    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]] +    ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 +    %6:_(s32) = G_ZEXT %5 +    %8:_(s32) = G_AND %4, %6 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir index 54ee69f..30c958f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir @@ -1,6 +1,5 @@  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s  ---  name: add_s16_ss  legalized: true @@ -19,13 +18,13 @@ body: |      ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]]      ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]]      %0:_(s32) = COPY $sgpr0      %1:_(s32) = COPY $sgpr1      %2:_(s16) = G_TRUNC %0      %3:_(s16) = G_TRUNC %1      %4:_(s16) = G_ADD %2, %3 -    S_ENDPGM 0, implicit %4 +    %5:_(s16) = G_AND %4, %4  ...  --- @@ -44,13 +43,13 @@ body: |      ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)      ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]      %0:_(s32) = COPY $sgpr0      %1:_(s32) = COPY $vgpr0      %2:_(s16) = G_TRUNC %0      %3:_(s16) = G_TRUNC %1      %4:_(s16) = G_ADD %2, %3 -    S_ENDPGM 0, implicit %4 +    %5:_(s16) = G_AND %4, %4  ...  --- @@ -69,13 +68,13 @@ body: |      ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32)      ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]      %0:_(s32) = COPY $vgpr0      %1:_(s32) = COPY $sgpr0      %2:_(s16) = G_TRUNC %0      %3:_(s16) = G_TRUNC %1      %4:_(s16) = G_ADD %2, %3 -    S_ENDPGM 0, implicit %4 +    %5:_(s16) = G_AND %4, %4  ...  --- @@ -93,11 +92,11 @@ body: |      ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)      ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]]      %0:_(s32) = COPY $vgpr0      %1:_(s32) = COPY $vgpr1      %2:_(s16) = G_TRUNC %0      %3:_(s16) = G_TRUNC %1      %4:_(s16) = G_ADD %2, %3 -    S_ENDPGM 0, implicit %4 +    %5:_(s16) = G_AND %4, %4  ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir index 97018fa..01eb391 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir @@ -1,6 +1,5 @@  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s  ---  name: add_v2s16_ss @@ -18,16 +17,19 @@ body: |      ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16      ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32)      ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) -    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 -    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) +    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]]      ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]]      ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32) -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) +    ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 +    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR]]      %0:_(<2 x s16>) = COPY $sgpr0      %1:_(<2 x s16>) = COPY $sgpr1      %2:_(<2 x s16>) = G_ADD %0, %1 -    S_ENDPGM 0, implicit %2 +    %3:_(s16) = G_CONSTANT i16 255 +    %4:_(<2 x s16>) = G_BUILD_VECTOR %3, %3 +    %5:_(<2 x s16>) = G_AND %2, %4  ...  --- @@ -44,11 +46,11 @@ body: |      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0      ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]] -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]      %0:_(<2 x s16>) = COPY $sgpr0      %1:_(<2 x s16>) = COPY $vgpr0      %2:_(<2 x s16>) = G_ADD %0, %1 -    S_ENDPGM 0, implicit %2 +    %3:_(<2 x s16>) = G_AND %2, %2  ...  --- @@ -65,9 +67,11 @@ body: |      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0      ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>)      ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]      %0:_(<2 x s16>) = COPY $vgpr0      %1:_(<2 x s16>) = COPY $sgpr0      %2:_(<2 x s16>) = G_ADD %0, %1 +    %3:_(<2 x s16>) = G_AND %2, %2  ...  --- @@ -83,9 +87,9 @@ body: |      ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1      ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]] -    ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]]      %0:_(<2 x s16>) = COPY $vgpr0      %1:_(<2 x s16>) = COPY $vgpr1      %2:_(<2 x s16>) = G_ADD %0, %1 -    S_ENDPGM 0, implicit %2 +    %3:_(<2 x s16>) = G_AND %2, %2  ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir index 7378c93..e0e783e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -77,10 +77,14 @@ body: |      ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0      ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]      ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) +    ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]      %0:_(s32) = COPY $sgpr0      %1:_(s32) = COPY $sgpr1      %2:_(s1) = G_ICMP intpred(eq), %0, %1      %3:_(s16) = G_SEXT %2 +    %4:_(s16) = G_CONSTANT i16 255 +    %5:_(s16) = G_AND %3, %4  ...  --- @@ -215,9 +219,13 @@ body: |      ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0      ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]]      ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) +    ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]]      %0:_(s32) = COPY $sgpr0      %1:_(s1) = G_TRUNC %0      %2:_(s16) = G_SEXT %1 +    %3:_(s16) = G_CONSTANT i16 255 +    %4:_(s16) = G_AND %2, %3  ...  --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir index b0199d3..e3c01c0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -1,5 +1,107 @@  # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: sub_s16_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: sub_s16_ss +    ; CHECK: liveins: $sgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) +    ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]] +    ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_SUB %2, %3 +    %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: sub_s16_sv +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[COPY2]], [[TRUNC1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr0 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_SUB %2, %3 +    %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_vs +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: sub_s16_vs +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr0 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_SUB %2, %3 +    %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: sub_s16_vv +    ; CHECK: liveins: $vgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) +    ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[TRUNC1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s16) = G_TRUNC %0 +    %3:_(s16) = G_TRUNC %1 +    %4:_(s16) = G_SUB %2, %3 +    %6:_(s16) = G_AND %4, %4 +...  ---  name: sub_s32_ss @@ -14,9 +116,11 @@ body: |      ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1      ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[SUB]], [[SUB]]      %0:_(s32) = COPY $sgpr0      %1:_(s32) = COPY $sgpr1      %2:_(s32) = G_SUB %0, %1 +    %4:_(s32) = G_AND %2, %2  ...  --- @@ -33,9 +137,11 @@ body: |      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0      ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)      ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]      %0:_(s32) = COPY $sgpr0      %1:_(s32) = COPY $vgpr0      %2:_(s32) = G_SUB %0, %1 +    %4:_(s32) = G_AND %2, %2  ...  --- @@ -52,9 +158,11 @@ body: |      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0      ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)      ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]      %0:_(s32) = COPY $vgpr0      %1:_(s32) = COPY $sgpr0      %2:_(s32) = G_SUB %0, %1 +    %4:_(s32) = G_AND %2, %2  ...  --- @@ -70,7 +178,376 @@ body: |      ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0      ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1      ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]]      %0:_(s32) = COPY $vgpr0      %1:_(s32) = COPY $vgpr1      %2:_(s32) = G_SUB %0, %1 +    %4:_(s32) = G_AND %2, %2 +... + +--- +name: sub_v2s16_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: sub_v2s16_ss +    ; CHECK: liveins: $sgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 +    ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) +    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 +    ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) +    ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) +    ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]] +    ; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[LSHR]], [[LSHR1]] +    ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC]] +    %0:_(<2 x s16>) = COPY $sgpr0 +    %1:_(<2 x s16>) = COPY $sgpr1 +    %2:_(<2 x s16>) = G_SUB %0, %1 +    %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: sub_v2s16_sv +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] +    %0:_(<2 x s16>) = COPY $sgpr0 +    %1:_(<2 x s16>) = COPY $vgpr0 +    %2:_(<2 x s16>) = G_SUB %0, %1 +    %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_vs +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr0 +    ; CHECK-LABEL: name: sub_v2s16_vs +    ; CHECK: liveins: $sgpr0, $vgpr0 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] +    %0:_(<2 x s16>) = COPY $vgpr0 +    %1:_(<2 x s16>) = COPY $sgpr0 +    %2:_(<2 x s16>) = G_SUB %0, %1 +    %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: sub_v2s16_vv +    ; CHECK: liveins: $vgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] +    %0:_(<2 x s16>) = COPY $vgpr0 +    %1:_(<2 x s16>) = COPY $vgpr1 +    %2:_(<2 x s16>) = G_SUB %0, %1 +    %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_s64_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $sgpr0_sgpr1 +    ; CHECK-LABEL: name: sub_s64_ss +    ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0_sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s64) = G_SUB [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[SUB]], [[SUB]] +    %0:_(s64) = COPY $sgpr0_sgpr1 +    %1:_(s64) = COPY $sgpr0_sgpr1 +    %2:_(s64) = G_SUB %0, %1 +    %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-LABEL: name: sub_s64_sv +    ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) +    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] +    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) +    %0:_(s64) = COPY $sgpr0_sgpr1 +    %1:_(s64) = COPY $vgpr0_vgpr1 +    %2:_(s64) = G_SUB %0, %1 +    %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_vs +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-LABEL: name: sub_s64_vs +    ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) +    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] +    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) +    %0:_(s64) = COPY $vgpr0_vgpr1 +    %1:_(s64) = COPY $sgpr0_sgpr1 +    %2:_(s64) = G_SUB %0, %1 +    %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 +    ; CHECK-LABEL: name: sub_s64_vv +    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 +    ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) +    ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] +    ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) +    %0:_(s64) = COPY $vgpr0_vgpr1 +    %1:_(s64) = COPY $vgpr2_vgpr3 +    %2:_(s64) = G_SUB %0, %1 +    %4:_(s64) = G_AND %2, %2 +... + +--- +name: usubo_s32_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1 +    ; CHECK-LABEL: name: usubo_s32_ss +    ; CHECK: liveins: $sgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[USUBO]], [[USUBO]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32), %3:_(s1) = G_USUBO %0, %1 +    %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr1 +    ; CHECK-LABEL: name: usubo_s32_sv +    ; CHECK: liveins: $sgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) +    ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32), %3:_(s1) = G_USUBO %0, %1 +    %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_vs +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr1 +    ; CHECK-LABEL: name: usubo_s32_vs +    ; CHECK: liveins: $vgpr0, $sgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32), %3:_(s1) = G_USUBO %0, %1 +    %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1 +    ; CHECK-LABEL: name: usubo_s32_vv +    ; CHECK: liveins: $vgpr0, $vgpr1 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32), %3:_(s1) = G_USUBO %0, %1 +    %5:_(s32) = G_AND %2, %2 +... + +--- +name: usube_s32_ss +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $sgpr1, $sgpr2 +    ; CHECK-LABEL: name: usube_s32_ss +    ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] +    ; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[USUBE]], [[USUBE]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 +    %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_sv +legalized: true + +body: | +  bb.0: +    liveins: $sgpr0, $vgpr1, $sgpr2 +    ; CHECK-LABEL: name: usube_s32_sv +    ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) +    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) +    ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] +    %0:_(s32) = COPY $sgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 +    %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_vs +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $sgpr1, $sgpr2 +    ; CHECK-LABEL: name: usube_s32_vs +    ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 +    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) +    ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) +    ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]] +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $sgpr1 +    %2:_(s32) = COPY $sgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 +    %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_vv +legalized: true + +body: | +  bb.0: +    liveins: $vgpr0, $vgpr1, $vgpr2 +    ; CHECK-LABEL: name: usube_s32_vv +    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 +    ; CHECK-NEXT: {{  $}} +    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 +    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 +    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 +    ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 +    ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]] +    ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 +    ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] +    ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]] +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] +    %0:_(s32) = COPY $vgpr0 +    %1:_(s32) = COPY $vgpr1 +    %2:_(s32) = COPY $vgpr2 +    %3:_(s1) = G_TRUNC %2 +    %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 +    %7:_(s32) = G_AND %4, %4  ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir index 088c20a3..d4baa5f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -73,10 +73,14 @@ body: |      ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0      ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]      ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) +    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]      %0:_(s32) = COPY $sgpr0      %1:_(s32) = COPY $sgpr1      %2:_(s1) = G_ICMP intpred(eq), %0, %1      %3:_(s16) = G_ZEXT %2 +    %4:_(s16) = G_CONSTANT i16 255 +    %5:_(s16) = G_AND %3, %4  ...  --- @@ -209,9 +213,13 @@ body: |      ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0      ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]]      ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) +    ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 +    ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]]      %0:_(s32) = COPY $sgpr0      %1:_(s1) = G_TRUNC %0      %2:_(s16) = G_ZEXT %1 +    %3:_(s16) = G_CONSTANT i16 255 +    %4:_(s16) = G_AND %2, %3  ...  --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll new file mode 100644 index 0000000..8b5958d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll @@ -0,0 +1,535 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s + +define i16 @s_sub_i16(i16 inreg %a, i16 inreg %b) { +; GFX7-LABEL: s_sub_i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_sub_i32 s4, s16, s17 +; GFX7-NEXT:    v_mov_b32_e32 v0, s4 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_sub_i32 s4, s16, s17 +; GFX9-NEXT:    v_mov_b32_e32 v0, s4 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_sub_i32 s4, s16, s17 +; GFX8-NEXT:    v_mov_b32_e32 v0, s4 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_sub_i32 s4, s16, s17 +; GFX10-NEXT:    v_mov_b32_e32 v0, s4 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_sub_i32 s0, s0, s1 +; GFX11-NEXT:    v_mov_b32_e32 v0, s0 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_sub_co_i32 s0, s0, s1 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_mov_b32_e32 v0, s0 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub i16 %a, %b +  ret i16 %c +} + +define i16 @v_sub_i16(i16 %a, i16 %b) { +; GFX7-LABEL: v_sub_i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_sub_u16_e32 v0, v0, v1 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_sub_u16_e32 v0, v0, v1 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_sub_nc_u16 v0, v0, v1 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_sub_nc_u16 v0.l, v0.l, v1.l +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_sub_nc_u16 v0, v0, v1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub i16 %a, %b +  ret i16 %c +} + +define i32 @s_sub_i32(i32 inreg %a, i32 inreg %b) { +; GFX7-LABEL: s_sub_i32: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_sub_i32 s4, s16, s17 +; GFX7-NEXT:    v_mov_b32_e32 v0, s4 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i32: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_sub_i32 s4, s16, s17 +; GFX9-NEXT:    v_mov_b32_e32 v0, s4 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i32: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_sub_i32 s4, s16, s17 +; GFX8-NEXT:    v_mov_b32_e32 v0, s4 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i32: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_sub_i32 s4, s16, s17 +; GFX10-NEXT:    v_mov_b32_e32 v0, s4 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i32: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_sub_i32 s0, s0, s1 +; GFX11-NEXT:    v_mov_b32_e32 v0, s0 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i32: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_sub_co_i32 s0, s0, s1 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_mov_b32_e32 v0, s0 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub i32 %a, %b +  ret i32 %c +} + +define i32 @v_sub_i32(i32 %a, i32 %b) { +; GFX7-LABEL: v_sub_i32: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i32: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_sub_u32_e32 v0, v0, v1 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i32: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i32: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i32: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i32: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_sub_nc_u32_e32 v0, v0, v1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub i32 %a, %b +  ret i32 %c +} + +; TODO: Add test for s_sub_v2i16. Instruction selector currently fails +; to handle G_UNMERGE_VALUES. + +define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_sub_v2i16: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT:    v_sub_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_v2i16: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_v2i16: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_sub_u16_e32 v2, v0, v1 +; GFX8-NEXT:    v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT:    v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_v2i16: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_pk_sub_i16 v0, v0, v1 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_v2i16: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_pk_sub_i16 v0, v0, v1 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_v2i16: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_pk_sub_i16 v0, v0, v1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub <2 x i16> %a, %b +  ret <2 x i16> %c +} + +define i64 @s_sub_i64(i64 inreg %a, i64 inreg %b) { +; GFX7-LABEL: s_sub_i64: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_sub_u32 s4, s16, s18 +; GFX7-NEXT:    s_subb_u32 s5, s17, s19 +; GFX7-NEXT:    v_mov_b32_e32 v0, s4 +; GFX7-NEXT:    v_mov_b32_e32 v1, s5 +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i64: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_sub_u32 s4, s16, s18 +; GFX9-NEXT:    s_subb_u32 s5, s17, s19 +; GFX9-NEXT:    v_mov_b32_e32 v0, s4 +; GFX9-NEXT:    v_mov_b32_e32 v1, s5 +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i64: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_sub_u32 s4, s16, s18 +; GFX8-NEXT:    s_subb_u32 s5, s17, s19 +; GFX8-NEXT:    v_mov_b32_e32 v0, s4 +; GFX8-NEXT:    v_mov_b32_e32 v1, s5 +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i64: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_sub_u32 s4, s16, s18 +; GFX10-NEXT:    s_subb_u32 s5, s17, s19 +; GFX10-NEXT:    v_mov_b32_e32 v0, s4 +; GFX10-NEXT:    v_mov_b32_e32 v1, s5 +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i64: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_sub_u32 s0, s0, s2 +; GFX11-NEXT:    s_subb_u32 s1, s1, s3 +; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i64: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_sub_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub i64 %a, %b +  ret i64 %c +} + +define i64 @v_sub_i64(i64 %a, i64 %b) { +; GFX7-LABEL: v_sub_i64: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i64: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i64: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i64: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i64: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT:    v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i64: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT:    s_wait_alu 0xfffd +; GFX12-NEXT:    v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %c = sub i64 %a, %b +  ret i64 %c +} + +define void @s_usubo_usube(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: s_usubo_usube: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    s_sub_u32 s4, s16, s18 +; GFX7-NEXT:    s_subb_u32 s5, s17, s19 +; GFX7-NEXT:    v_mov_b32_e32 v4, s4 +; GFX7-NEXT:    s_mov_b32 s6, 0 +; GFX7-NEXT:    s_cselect_b32 s8, 1, 0 +; GFX7-NEXT:    v_mov_b32_e32 v5, s5 +; GFX7-NEXT:    s_mov_b32 s7, 0xf000 +; GFX7-NEXT:    s_mov_b64 s[4:5], 0 +; GFX7-NEXT:    buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT:    v_mov_b32_e32 v0, s8 +; GFX7-NEXT:    buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-NEXT:    s_waitcnt vmcnt(0) +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_usubo_usube: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    s_sub_u32 s4, s16, s18 +; GFX9-NEXT:    s_subb_u32 s5, s17, s19 +; GFX9-NEXT:    v_mov_b32_e32 v4, s4 +; GFX9-NEXT:    s_cselect_b32 s6, 1, 0 +; GFX9-NEXT:    v_mov_b32_e32 v5, s5 +; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off +; GFX9-NEXT:    v_mov_b32_e32 v0, s6 +; GFX9-NEXT:    global_store_dword v[2:3], v0, off +; GFX9-NEXT:    s_waitcnt vmcnt(0) +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_usubo_usube: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    s_sub_u32 s4, s16, s18 +; GFX8-NEXT:    s_subb_u32 s5, s17, s19 +; GFX8-NEXT:    v_mov_b32_e32 v4, s4 +; GFX8-NEXT:    s_cselect_b32 s6, 1, 0 +; GFX8-NEXT:    v_mov_b32_e32 v5, s5 +; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[4:5] +; GFX8-NEXT:    v_mov_b32_e32 v0, s6 +; GFX8-NEXT:    flat_store_dword v[2:3], v0 +; GFX8-NEXT:    s_waitcnt vmcnt(0) +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_usubo_usube: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    s_sub_u32 s4, s16, s18 +; GFX10-NEXT:    s_subb_u32 s5, s17, s19 +; GFX10-NEXT:    s_cselect_b32 s6, 1, 0 +; GFX10-NEXT:    v_mov_b32_e32 v4, s4 +; GFX10-NEXT:    v_mov_b32_e32 v5, s5 +; GFX10-NEXT:    v_mov_b32_e32 v6, s6 +; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off +; GFX10-NEXT:    global_store_dword v[2:3], v6, off +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_usubo_usube: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    s_sub_u32 s0, s0, s2 +; GFX11-NEXT:    s_subb_u32 s1, s1, s3 +; GFX11-NEXT:    s_cselect_b32 s2, 1, 0 +; GFX11-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX11-NEXT:    v_mov_b32_e32 v6, s2 +; GFX11-NEXT:    global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT:    global_store_b32 v[2:3], v6, off +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_usubo_usube: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    s_sub_co_u32 s0, s0, s2 +; GFX12-NEXT:    s_sub_co_ci_u32 s1, s1, s3 +; GFX12-NEXT:    s_cselect_b32 s2, 1, 0 +; GFX12-NEXT:    s_wait_alu 0xfffe +; GFX12-NEXT:    v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX12-NEXT:    v_mov_b32_e32 v6, s2 +; GFX12-NEXT:    global_store_b64 v[0:1], v[4:5], off +; GFX12-NEXT:    global_store_b32 v[2:3], v6, off +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) +  %sub = extractvalue {i64, i1} %usubo, 0 +  %of = extractvalue {i64, i1} %usubo, 1 +  %of32 = select i1 %of, i32 1, i32 0 +  store i64 %sub, ptr addrspace(1) %res +  store i32 %of32, ptr addrspace(1) %carry +  ret void +} + +define void @v_usubo_usube(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: v_usubo_usube: +; GFX7:       ; %bb.0: +; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT:    v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT:    s_mov_b32 s6, 0 +; GFX7-NEXT:    s_mov_b32 s7, 0xf000 +; GFX7-NEXT:    s_mov_b64 s[4:5], 0 +; GFX7-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT:    buffer_store_dword v2, v[6:7], s[4:7], 0 addr64 +; GFX7-NEXT:    s_waitcnt vmcnt(0) +; GFX7-NEXT:    s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_usubo_usube: +; GFX9:       ; %bb.0: +; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT:    global_store_dword v[6:7], v2, off +; GFX9-NEXT:    s_waitcnt vmcnt(0) +; GFX9-NEXT:    s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_usubo_usube: +; GFX8:       ; %bb.0: +; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT:    v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT:    flat_store_dwordx2 v[4:5], v[0:1] +; GFX8-NEXT:    flat_store_dword v[6:7], v2 +; GFX8-NEXT:    s_waitcnt vmcnt(0) +; GFX8-NEXT:    s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_usubo_usube: +; GFX10:       ; %bb.0: +; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off +; GFX10-NEXT:    global_store_dword v[6:7], v2, off +; GFX10-NEXT:    s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_usubo_usube: +; GFX11:       ; %bb.0: +; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT:    global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT:    global_store_b32 v[6:7], v2, off +; GFX11-NEXT:    s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_usubo_usube: +; GFX12:       ; %bb.0: +; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT:    s_wait_expcnt 0x0 +; GFX12-NEXT:    s_wait_samplecnt 0x0 +; GFX12-NEXT:    s_wait_bvhcnt 0x0 +; GFX12-NEXT:    s_wait_kmcnt 0x0 +; GFX12-NEXT:    v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT:    s_wait_alu 0xfffd +; GFX12-NEXT:    v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT:    s_wait_alu 0xfffd +; GFX12-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT:    global_store_b32 v[6:7], v2, off +; GFX12-NEXT:    s_setpc_b64 s[30:31] +  %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) +  %sub = extractvalue {i64, i1} %usubo, 0 +  %of = extractvalue {i64, i1} %usubo, 1 +  %of32 = select i1 %of, i32 1, i32 0 +  store i64 %sub, ptr addrspace(1) %res +  store i32 %of32, ptr addrspace(1) %carry +  ret void +} | 
