diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/flat-scratch.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 54 |
1 files changed, 27 insertions, 27 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index b5e579b..b25d9b2 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -714,10 +714,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) { ; GFX11-LABEL: store_load_vindex_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-NEXT: v_mov_b32_e32 v2, 15 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshl_b32 s0, s0, 7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -732,9 +732,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) { ; GFX12-LABEL: store_load_vindex_kernel: ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24 -; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_lshl_b32 s0, s0, 7 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -769,8 +769,8 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) { ; GFX942-LABEL: store_load_vindex_kernel: ; GFX942: ; %bb.0: ; %bb ; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 -; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 15 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_lshl_b32 s0, s0, 7 @@ -809,10 +809,10 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) { ; GFX11-PAL-LABEL: store_load_vindex_kernel: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0 -; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 15 ; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7 ; GFX11-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -827,9 +827,9 @@ define amdgpu_kernel void @store_load_vindex_kernel(i32 %n) { ; GFX12-PAL-LABEL: store_load_vindex_kernel: ; GFX12-PAL: ; %bb.0: ; %bb ; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0 -; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX12-PAL-NEXT: s_wait_kmcnt 0x0 ; GFX12-PAL-NEXT: s_lshl_b32 s0, s0, 7 ; GFX12-PAL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) @@ -1958,10 +1958,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) { ; GFX11-LABEL: store_load_vindex_small_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 -; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-NEXT: scratch_load_b32 v3, off, off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) @@ -1976,10 +1976,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) { ; GFX12-LABEL: store_load_vindex_small_offset_kernel: ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24 -; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 @@ -2021,8 +2021,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) { ; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 ; GFX942-NEXT: scratch_load_dword v1, off, off sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 15 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_lshl_b32 s0, s0, 7 @@ -2092,10 +2092,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) { ; GFX11-PAL-LABEL: store_load_vindex_small_offset_kernel: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0 -; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 dlc ; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0) @@ -2110,10 +2110,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel(i32 %n) { ; GFX12-PAL-LABEL: store_load_vindex_small_offset_kernel: ; GFX12-PAL: ; %bb.0: ; %bb ; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0 -; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 -; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX12-PAL-NEXT: s_wait_kmcnt 0x0 ; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:384 scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_storecnt 0x0 @@ -3254,10 +3254,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) { ; GFX11-LABEL: store_load_vindex_large_offset_kernel: ; GFX11: ; %bb.0: ; %bb ; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 -; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_lshl_b32 s0, s0, 7 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -3274,10 +3274,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) { ; GFX12-LABEL: store_load_vindex_large_offset_kernel: ; GFX12: ; %bb.0: ; %bb ; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24 -; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX12-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS ; GFX12-NEXT: s_wait_storecnt 0x0 @@ -3319,8 +3319,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) { ; GFX942-NEXT: s_load_dword s0, s[4:5], 0x24 ; GFX942-NEXT: scratch_load_dword v1, off, off offset:4 sc0 sc1 ; GFX942-NEXT: s_waitcnt vmcnt(0) -; GFX942-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GFX942-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX942-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX942-NEXT: v_mov_b32_e32 v1, 15 ; GFX942-NEXT: s_waitcnt lgkmcnt(0) ; GFX942-NEXT: s_lshl_b32 s0, s0, 7 @@ -3391,10 +3391,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) { ; GFX11-PAL-LABEL: store_load_vindex_large_offset_kernel: ; GFX11-PAL: ; %bb.0: ; %bb ; GFX11-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0 -; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX11-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-PAL-NEXT: scratch_load_b32 v3, off, off offset:4 glc dlc ; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) -; GFX11-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX11-PAL-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-PAL-NEXT: s_lshl_b32 s0, s0, 7 ; GFX11-PAL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) @@ -3411,10 +3411,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel(i32 %n) { ; GFX12-PAL-LABEL: store_load_vindex_large_offset_kernel: ; GFX12-PAL: ; %bb.0: ; %bb ; GFX12-PAL-NEXT: s_load_b32 s0, s[4:5], 0x0 -; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_and_b32 v0, 0x3ff, v0 +; GFX12-PAL-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX12-PAL-NEXT: scratch_load_b32 v3, off, off scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 -; GFX12-PAL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX12-PAL-NEXT: v_and_b32_e32 v0, 0xffc, v0 ; GFX12-PAL-NEXT: s_wait_kmcnt 0x0 ; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, off offset:16512 scope:SCOPE_SYS ; GFX12-PAL-NEXT: s_wait_storecnt 0x0 |