diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/flat-scratch.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index b25d9b2..fc88839 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -3621,7 +3621,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-NEXT: s_mov_b32 s0, 0 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_movk_i32 s0, 0x3004 +; GFX9-NEXT: s_movk_i32 s0, 0x3000 +; GFX9-NEXT: s_add_i32 s0, s0, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3637,7 +3638,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_movk_i32 s0, 0x3804 +; GFX10-NEXT: s_movk_i32 s0, 0x3800 +; GFX10-NEXT: s_add_i32 s0, s0, 4 ; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3682,7 +3684,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s13, 0 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3004 +; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -3716,8 +3719,9 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX1010-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX1010-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3800 ; GFX1010-PAL-NEXT: s_mov_b32 s1, 0 -; GFX1010-PAL-NEXT: s_movk_i32 s0, 0x3804 +; GFX1010-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX1010-PAL-NEXT: scratch_store_dword off, v0, s1 offset:4 ; GFX1010-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3739,7 +3743,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX1030-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v0, 13 ; GFX1030-PAL-NEXT: v_mov_b32_e32 v1, 15 -; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3804 +; GFX1030-PAL-NEXT: s_movk_i32 s0, 0x3800 +; GFX1030-PAL-NEXT: s_add_i32 s0, s0, 4 ; GFX1030-PAL-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX1030-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1030-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3785,10 +3790,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-LABEL: store_load_large_imm_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-NEXT: s_add_i32 s1, s32, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_i32 s0, s32, 0x3004 +; GFX9-NEXT: s_add_i32 s0, s1, 4 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -3800,8 +3807,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10: ; %bb.0: ; %bb ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-NEXT: s_movk_i32 s0, 0x3800 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-NEXT: s_add_i32 s0, s32, 0x3804 +; GFX10-NEXT: s_add_i32 s1, s32, s0 +; GFX10-NEXT: s_add_i32 s0, s1, 4 ; GFX10-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 offset:1664 @@ -3843,10 +3852,12 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-PAL-LABEL: store_load_large_imm_offset_foo: ; GFX9-PAL: ; %bb.0: ; %bb ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-PAL-NEXT: s_movk_i32 s0, 0x3000 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 13 +; GFX9-PAL-NEXT: s_add_i32 s1, s32, s0 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX9-PAL-NEXT: s_add_i32 s0, s32, 0x3004 +; GFX9-PAL-NEXT: s_add_i32 s0, s1, 4 ; GFX9-PAL-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-PAL-NEXT: scratch_store_dword off, v0, s0 offset:3712 ; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) @@ -3872,8 +3883,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX10-PAL: ; %bb.0: ; %bb ; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-PAL-NEXT: v_mov_b32_e32 v0, 13 +; GFX10-PAL-NEXT: s_movk_i32 s0, 0x3800 ; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 -; GFX10-PAL-NEXT: s_add_i32 s0, s32, 0x3804 +; GFX10-PAL-NEXT: s_add_i32 s1, s32, s0 +; GFX10-PAL-NEXT: s_add_i32 s0, s1, 4 ; GFX10-PAL-NEXT: scratch_store_dword off, v0, s32 offset:4 ; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-PAL-NEXT: scratch_store_dword off, v1, s0 offset:1664 |