diff options
author | Changpeng Fang <changpeng.fang@amd.com> | 2025-08-06 11:47:37 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-06 11:47:37 -0700 |
commit | 32161e9de36a747dde22a06c1c99a6091eb2920b (patch) | |
tree | e880069ffc6685622c5bcdab8da7c20d6527c459 /llvm/test | |
parent | 35bd40d321ccb2e646c112418ef32318dd0e040b (diff) | |
download | llvm-main.zip llvm-main.tar.gz llvm-main.tar.bz2 |
Do not fold an immediate into an instruction that already has a frame
index operand. A frame index could possibly turn out to be another immediate.
Fixes: SWDEV-536263
---------
Co-authored-by: Matt Arsenault <arsenm2@gmail.com>
Diffstat (limited to 'llvm/test')
8 files changed, 215 insertions, 46 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index a066b15..e6a8bac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -1917,8 +1917,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_movk_i32 s0, 0x3e84 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -1933,7 +1934,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_movk_i32 s0, 0x3e84 +; GFX10-NEXT: s_movk_i32 s0, 0x3e80 +; GFX10-NEXT: s_add_i32 s0, s0, 4 ; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 @@ -1945,10 +1947,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX942-LABEL: store_load_large_imm_offset_kernel: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: v_mov_b32_e32 v0, 13 +; GFX942-NEXT: s_movk_i32 s0, 0x3e80 ; GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: v_mov_b32_e32 v0, 15 -; GFX942-NEXT: s_movk_i32 s0, 0x3e84 +; GFX942-NEXT: s_add_i32 s0, s0, 4 ; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 @@ -1958,7 +1961,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX11-LABEL: store_load_large_imm_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 -; GFX11-NEXT: s_movk_i32 s0, 0x3e84 +; GFX11-NEXT: s_movk_i32 s0, 0x3e80 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s0, s0, 4 ; GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc @@ -1986,8 +1991,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; UNALIGNED_GFX9-NEXT: s_mov_b32 s0, 0 ; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) +; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e84 +; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s0, 4 ; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -2002,7 +2008,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; UNALIGNED_GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13 ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e84 +; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80 +; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s0, 4 ; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0 @@ -2014,10 +2021,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_kernel: ; UNALIGNED_GFX942: ; %bb.0: ; %bb ; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13 +; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80 ; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, off offset:4 sc0 sc1 ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e84 +; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s0, 4 ; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 @@ -2027,7 +2035,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; UNALIGNED_GFX11-LABEL: store_load_large_imm_offset_kernel: ; UNALIGNED_GFX11: ; %bb.0: ; %bb ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 -; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e84 +; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80 +; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s0, 4 ; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, off offset:4 dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc @@ -2061,11 +2071,13 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-LABEL: store_load_large_imm_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_i32 s1, s32, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_add_i32 s0, s32, 0x3e84 +; GFX9-NEXT: s_add_i32 s0, s1, 4 ; GFX9-NEXT: scratch_store_dword off, v0, s0 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -2076,8 +2088,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-NEXT: s_movk_i32 s0, 0x3e80 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_add_i32 s0, s32, 0x3e84 +; GFX10-NEXT: s_add_i32 s1, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 @@ -2089,11 +2103,13 @@ define void @store_load_large_imm_offset_foo() { ; GFX942-LABEL: store_load_large_imm_offset_foo: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT: s_movk_i32 s0, 0x3e80 ; GFX942-NEXT: v_mov_b32_e32 v0, 13 +; GFX942-NEXT: s_add_i32 s1, s32, s0 ; GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: v_mov_b32_e32 v0, 15 -; GFX942-NEXT: s_add_i32 s0, s32, 0x3e84 +; GFX942-NEXT: s_add_i32 s0, s1, 4 ; GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) ; GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 @@ -2104,7 +2120,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 -; GFX11-NEXT: s_add_i32 s0, s32, 0x3e84 +; GFX11-NEXT: s_movk_i32 s0, 0x3e80 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s1, s32, s0 +; GFX11-NEXT: s_add_i32 s0, s1, 4 ; GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc @@ -2133,11 +2152,13 @@ define void @store_load_large_imm_offset_foo() { ; UNALIGNED_GFX9-LABEL: store_load_large_imm_offset_foo: ; UNALIGNED_GFX9: ; %bb.0: ; %bb ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 13 +; UNALIGNED_GFX9-NEXT: s_add_i32 s1, s32, s0 ; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s32, 0x3e84 +; UNALIGNED_GFX9-NEXT: s_add_i32 s0, s1, 4 ; UNALIGNED_GFX9-NEXT: scratch_store_dword off, v0, s0 ; UNALIGNED_GFX9-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX9-NEXT: scratch_load_dword v0, off, s0 glc @@ -2148,8 +2169,10 @@ define void @store_load_large_imm_offset_foo() { ; UNALIGNED_GFX10: ; %bb.0: ; %bb ; UNALIGNED_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v0, 13 +; UNALIGNED_GFX10-NEXT: s_movk_i32 s0, 0x3e80 ; UNALIGNED_GFX10-NEXT: v_mov_b32_e32 v1, 15 -; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s32, 0x3e84 +; UNALIGNED_GFX10-NEXT: s_add_i32 s1, s32, s0 +; UNALIGNED_GFX10-NEXT: s_add_i32 s0, s1, 4 ; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; UNALIGNED_GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; UNALIGNED_GFX10-NEXT: scratch_store_dword off, v1, s0 @@ -2161,11 +2184,13 @@ define void @store_load_large_imm_offset_foo() { ; UNALIGNED_GFX942-LABEL: store_load_large_imm_offset_foo: ; UNALIGNED_GFX942: ; %bb.0: ; %bb ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; UNALIGNED_GFX942-NEXT: s_movk_i32 s0, 0x3e80 ; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 13 +; UNALIGNED_GFX942-NEXT: s_add_i32 s1, s32, s0 ; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s32 offset:4 sc0 sc1 ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX942-NEXT: v_mov_b32_e32 v0, 15 -; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s32, 0x3e84 +; UNALIGNED_GFX942-NEXT: s_add_i32 s0, s1, 4 ; UNALIGNED_GFX942-NEXT: scratch_store_dword off, v0, s0 sc0 sc1 ; UNALIGNED_GFX942-NEXT: s_waitcnt vmcnt(0) ; UNALIGNED_GFX942-NEXT: scratch_load_dword v0, off, s0 sc0 sc1 @@ -2176,7 +2201,10 @@ define void @store_load_large_imm_offset_foo() { ; UNALIGNED_GFX11: ; %bb.0: ; %bb ; UNALIGNED_GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; UNALIGNED_GFX11-NEXT: v_dual_mov_b32 v0, 13 :: v_dual_mov_b32 v1, 15 -; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s32, 0x3e84 +; UNALIGNED_GFX11-NEXT: s_movk_i32 s0, 0x3e80 +; UNALIGNED_GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; UNALIGNED_GFX11-NEXT: s_add_i32 s1, s32, s0 +; UNALIGNED_GFX11-NEXT: s_add_i32 s0, s1, 4 ; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v0, s32 offset:4 dlc ; UNALIGNED_GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; UNALIGNED_GFX11-NEXT: scratch_store_b32 off, v1, s0 dlc diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index b25d9b2..fc88839 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -3621,7 +3621,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_movk_i32 s0, 0x3004 +; GFX9-NEXT: s_movk_i32 s0, 0x3000 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3637,7 +3638,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_movk_i32 s0, 0x3804 +; GFX10-NEXT: s_movk_i32 s0, 0x3800 +; GFX10-NEXT: s_add_i32 s0, s0, 4 ; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3682,7 +3684,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3004 +; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -3716,8 +3719,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800 ; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3804 +; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3739,7 +3743,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15 -; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3804 +; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800 +; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3785,10 +3790,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-LABEL: store_load_large_imm_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_i32 s1, s32, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s32, 0x3004 +; GFX9-NEXT: s_add_i32 s0, s1, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3800,8 +3807,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-NEXT: s_movk_i32 s0, 0x3800 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_add_i32 s0, s32, 0x3804 +; GFX10-NEXT: s_add_i32 s1, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3843,10 +3852,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-PAL-LABEL: store_load_large_imm_offset_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-PAL-NEXT: s_add_i32 s1, s32, s0 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x3004 +; GFX9-PAL-NEXT: s_add_i32 s0, s1, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -3872,8 +3883,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x3804 +; GFX10-PAL-NEXT: s_add_i32 s1, s32, s0 +; GFX10-PAL-NEXT: s_add_i32 s0, s1, 4 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir index 7fad2f4..a88b1ec 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir @@ -75,7 +75,8 @@ stack: body: | bb.0: ; CHECK-LABEL: name: fold_frame_index__s_add_i32__fi_materializedconst_0 - ; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 256, implicit-def $scc + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 256 + ; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]], implicit-def $scc ; CHECK-NEXT: $sgpr4 = COPY [[S_ADD_I32_]] ; CHECK-NEXT: SI_RETURN implicit $sgpr4 %0:sreg_32 = S_MOV_B32 %stack.0 diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir index cc43142..2f2d727 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir @@ -46,7 +46,8 @@ body: | %2:sreg_32 = S_LSHL2_ADD_U32 %0, %1, implicit-def $scc ... # GCN-LABEL: name: test_frameindex{{$}} -# GCN: %1:sreg_32 = S_ADD_I32 %stack.0, 70 +# GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 70 +# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %stack.0, [[S_MOV_B32_]] --- name: test_frameindex tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll index 15cda62..f2fe61f 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-index-elimination.ll @@ -360,7 +360,8 @@ entry: ; s_add_i32. ; GCN-LABEL: {{^}}fi_sop2_s_add_u32_literal_error: -; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0, 0x2010 +; GCN: s_movk_i32 [[S_MOVK_I32_:s[0-9]+]], 0x1000 +; GCN: s_add_u32 [[ADD_LO:s[0-9]+]], 0x1010, [[S_MOVK_I32_]] ; GCN: s_addc_u32 [[ADD_HI:s[0-9]+]], s{{[0-9]+}}, 0 define amdgpu_kernel void @fi_sop2_s_add_u32_literal_error() #0 { entry: diff --git a/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll b/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll index 1c298014..3001248 100644 --- a/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll +++ b/llvm/test/CodeGen/AMDGPU/issue130120-eliminate-frame-index.ll @@ -6,16 +6,24 @@ define amdgpu_gfx [13 x i32] @issue130120() { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v0, 0 -; CHECK-NEXT: s_add_i32 s0, s32, 0xf0 -; CHECK-NEXT: s_add_i32 s1, s32, 0xf4 -; CHECK-NEXT: s_add_i32 s2, s32, 0xf8 -; CHECK-NEXT: s_add_i32 s3, s32, 0xfc +; CHECK-NEXT: s_movk_i32 s1, 0xf4 +; CHECK-NEXT: s_movk_i32 s2, 0xf8 +; CHECK-NEXT: s_movk_i32 s3, 0xfc +; CHECK-NEXT: s_movk_i32 s34, 0x100 ; CHECK-NEXT: v_mov_b32_e32 v1, v0 -; CHECK-NEXT: s_add_i32 s34, s32, 0x100 -; CHECK-NEXT: s_add_i32 s35, s32, 0x104 -; CHECK-NEXT: s_add_i32 s36, s32, 0x108 -; CHECK-NEXT: s_add_i32 s37, s32, 0x110 -; CHECK-NEXT: s_add_i32 s38, s32, 0x120 +; CHECK-NEXT: s_movk_i32 s35, 0x104 +; CHECK-NEXT: s_movk_i32 s36, 0x108 +; CHECK-NEXT: s_movk_i32 s37, 0x110 +; CHECK-NEXT: s_movk_i32 s38, 0x120 +; CHECK-NEXT: s_add_i32 s0, s32, 0xf0 +; CHECK-NEXT: s_add_i32 s1, s32, s1 +; CHECK-NEXT: s_add_i32 s2, s32, s2 +; CHECK-NEXT: s_add_i32 s3, s32, s3 +; CHECK-NEXT: s_add_i32 s34, s32, s34 +; CHECK-NEXT: s_add_i32 s35, s32, s35 +; CHECK-NEXT: s_add_i32 s36, s32, s36 +; CHECK-NEXT: s_add_i32 s37, s32, s37 +; CHECK-NEXT: s_add_i32 s38, s32, s38 ; CHECK-NEXT: s_or_b32 s39, s32, 4 ; CHECK-NEXT: s_or_b32 s40, s32, 8 ; CHECK-NEXT: s_or_b32 s41, s32, 12 diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll index a3ebaec..5f0ca7b 100644 --- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll +++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll @@ -74,7 +74,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_cbranch_scc1 .LBB0_1 ; FLATSCR-NEXT: ; %bb.2: ; %split -; FLATSCR-NEXT: s_movk_i32 s0, 0x5000 +; FLATSCR-NEXT: s_movk_i32 s0, 0x2000 +; FLATSCR-NEXT: s_addk_i32 s0, 0x3000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:208 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_movk_i32 s0, 0x3000 @@ -175,7 +176,9 @@ define void @func_local_stack_offset_uses_sp(ptr addrspace(1) %out) { ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_cbranch_scc1 .LBB1_1 ; FLATSCR-NEXT: ; %bb.2: ; %split -; FLATSCR-NEXT: s_add_i32 s0, s33, 0x5000 +; FLATSCR-NEXT: s_movk_i32 s0, 0x2000 +; FLATSCR-NEXT: s_add_i32 s1, s33, s0 +; FLATSCR-NEXT: s_add_i32 s0, s1, 0x3000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[2:3], off, s0 offset:208 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_add_i32 s0, s33, 0x3000 @@ -223,30 +226,35 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: s_cbranch_scc1 .LBB2_1 ; MUBUF-NEXT: ; %bb.2: ; %split +; MUBUF-NEXT: s_movk_i32 s5, 0x12d4 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000 -; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d4, v1 +; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1 +; MUBUF-NEXT: s_movk_i32 s5, 0x12d0 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000 ; MUBUF-NEXT: s_movk_i32 s4, 0x4000 ; MUBUF-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_or_b32_e32 v0, 0x12d0, v1 +; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1 +; MUBUF-NEXT: s_movk_i32 s5, 0x12c4 ; MUBUF-NEXT: v_mov_b32_e32 v1, 0x4000 ; MUBUF-NEXT: s_or_b32 s4, s4, 0x12c0 ; MUBUF-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_or_b32_e32 v0, 0x12c4, v1 -; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000 +; MUBUF-NEXT: v_or_b32_e32 v0, s5, v1 ; MUBUF-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_mov_b32_e32 v0, s4 -; MUBUF-NEXT: v_or_b32_e32 v2, 0x12cc, v3 +; MUBUF-NEXT: s_movk_i32 s4, 0x12cc +; MUBUF-NEXT: v_mov_b32_e32 v3, 0x4000 +; MUBUF-NEXT: v_or_b32_e32 v2, s4, v3 +; MUBUF-NEXT: s_movk_i32 s4, 0x12c8 ; MUBUF-NEXT: v_mov_b32_e32 v6, 0x4000 ; MUBUF-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) ; MUBUF-NEXT: v_mov_b32_e32 v7, 0x4000 ; MUBUF-NEXT: buffer_load_dword v3, v2, s[0:3], 0 offen glc ; MUBUF-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: v_or_b32_e32 v2, 0x12c8, v6 +; MUBUF-NEXT: v_or_b32_e32 v2, s4, v6 ; MUBUF-NEXT: v_mov_b32_e32 v8, 0x4000 ; MUBUF-NEXT: v_mov_b32_e32 v9, 0x4000 ; MUBUF-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen glc @@ -298,7 +306,8 @@ define amdgpu_kernel void @local_stack_offset_uses_sp_flat(ptr addrspace(1) %out ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: s_cbranch_scc1 .LBB2_1 ; FLATSCR-NEXT: ; %bb.2: ; %split -; FLATSCR-NEXT: s_movk_i32 s0, 0x3000 +; FLATSCR-NEXT: s_movk_i32 s0, 0x1000 +; FLATSCR-NEXT: s_addk_i32 s0, 0x2000 ; FLATSCR-NEXT: scratch_load_dwordx2 v[8:9], off, s0 offset:720 glc ; FLATSCR-NEXT: s_waitcnt vmcnt(0) ; FLATSCR-NEXT: scratch_load_dwordx4 v[0:3], off, s0 offset:704 glc diff --git a/llvm/test/CodeGen/AMDGPU/no-folding-imm-to-inst-with-fi.ll b/llvm/test/CodeGen/AMDGPU/no-folding-imm-to-inst-with-fi.ll new file mode 100644 index 0000000..6d0aa1e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/no-folding-imm-to-inst-with-fi.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s + +define protected amdgpu_kernel void @no_folding_imm_to_inst_with_fi(<4 x i64> %val4, <16 x i64> %val16) { +; CHECK-LABEL: no_folding_imm_to_inst_with_fi: +; CHECK: ; %bb.0: ; %bb +; CHECK-NEXT: s_clause 0x2 +; CHECK-NEXT: s_load_b256 s[36:43], s[4:5], 0x24 +; CHECK-NEXT: s_load_b512 s[16:31], s[4:5], 0xe4 +; CHECK-NEXT: s_load_b512 s[0:15], s[4:5], 0xa4 +; CHECK-NEXT: s_mov_b64 s[34:35], src_private_base +; CHECK-NEXT: s_movk_i32 s33, 0x70 +; CHECK-NEXT: s_movk_i32 s34, 0x60 +; CHECK-NEXT: s_or_b32 s44, 0x80, s33 +; CHECK-NEXT: s_mov_b32 s45, s35 +; CHECK-NEXT: s_or_b32 s46, 0x80, s34 +; CHECK-NEXT: s_mov_b32 s47, s35 +; CHECK-NEXT: v_dual_mov_b32 v20, s44 :: v_dual_mov_b32 v21, s45 +; CHECK-NEXT: v_dual_mov_b32 v22, s46 :: v_dual_mov_b32 v23, s47 +; CHECK-NEXT: s_movk_i32 s34, 0x80 +; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; CHECK-NEXT: v_dual_mov_b32 v34, s34 :: v_dual_mov_b32 v35, s35 +; CHECK-NEXT: s_wait_kmcnt 0x0 +; CHECK-NEXT: v_dual_mov_b32 v0, s40 :: v_dual_mov_b32 v1, s41 +; CHECK-NEXT: v_dual_mov_b32 v2, s42 :: v_dual_mov_b32 v3, s43 +; CHECK-NEXT: v_dual_mov_b32 v4, s36 :: v_dual_mov_b32 v5, s37 +; CHECK-NEXT: v_dual_mov_b32 v6, s38 :: v_dual_mov_b32 v7, s39 +; CHECK-NEXT: scratch_store_b128 off, v[0:3], off offset:16 scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: v_dual_mov_b32 v0, s20 :: v_dual_mov_b32 v1, s21 +; CHECK-NEXT: s_movk_i32 s20, 0x50 +; CHECK-NEXT: v_dual_mov_b32 v8, s28 :: v_dual_mov_b32 v9, s29 +; CHECK-NEXT: v_dual_mov_b32 v10, s30 :: v_dual_mov_b32 v11, s31 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: s_or_b32 s20, 0x80, s20 +; CHECK-NEXT: s_mov_b32 s21, s35 +; CHECK-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v13, s25 +; CHECK-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v15, s27 +; CHECK-NEXT: v_dual_mov_b32 v2, s22 :: v_dual_mov_b32 v3, s23 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_dual_mov_b32 v25, s21 :: v_dual_mov_b32 v24, s20 +; CHECK-NEXT: scratch_store_b128 off, v[4:7], off scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[20:21], v[8:11] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[22:23], v[12:15] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[24:25], v[0:3] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17 +; CHECK-NEXT: s_or_b32 s16, 0x80, 64 +; CHECK-NEXT: s_mov_b32 s17, s35 +; CHECK-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 +; CHECK-NEXT: s_or_b32 s12, 0x80, 48 +; CHECK-NEXT: s_mov_b32 s13, s35 +; CHECK-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 +; CHECK-NEXT: s_or_b32 s8, 0x80, 32 +; CHECK-NEXT: s_mov_b32 s9, s35 +; CHECK-NEXT: v_dual_mov_b32 v12, s4 :: v_dual_mov_b32 v13, s5 +; CHECK-NEXT: s_or_b32 s4, 0x80, 16 +; CHECK-NEXT: s_mov_b32 s5, s35 +; CHECK-NEXT: v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19 +; CHECK-NEXT: s_wait_alu 0xfffe +; CHECK-NEXT: v_dual_mov_b32 v27, s17 :: v_dual_mov_b32 v26, s16 +; CHECK-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 +; CHECK-NEXT: v_dual_mov_b32 v29, s13 :: v_dual_mov_b32 v28, s12 +; CHECK-NEXT: v_dual_mov_b32 v31, s9 :: v_dual_mov_b32 v30, s8 +; CHECK-NEXT: v_dual_mov_b32 v33, s5 :: v_dual_mov_b32 v32, s4 +; CHECK-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 +; CHECK-NEXT: v_dual_mov_b32 v14, s6 :: v_dual_mov_b32 v15, s7 +; CHECK-NEXT: v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1 +; CHECK-NEXT: v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3 +; CHECK-NEXT: flat_store_b128 v[26:27], v[0:3] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[28:29], v[4:7] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[30:31], v[8:11] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[32:33], v[12:15] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_store_b128 v[34:35], v[16:19] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_storecnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[22:23] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[20:21] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[26:27] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[24:25] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[30:31] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[28:29] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[34:35] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 +; CHECK-NEXT: flat_load_b128 v[0:3], v[32:33] scope:SCOPE_SYS +; CHECK-NEXT: s_wait_loadcnt 0x0 +; CHECK-NEXT: s_endpgm +bb: + %alloca = alloca <4 x i64>, align 32, addrspace(5) + %alloca1 = alloca <16 x i64>, align 128, addrspace(5) + store volatile <4 x i64> %val4, ptr addrspace(5) %alloca + %ascast = addrspacecast ptr addrspace(5) %alloca1 to ptr + store volatile <16 x i64> %val16, ptr %ascast + %load = load volatile <16 x i64>, ptr %ascast + ret void +} |