diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll | 576 |
1 files changed, 539 insertions, 37 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll index 72260e0..6e24a6a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll @@ -1,36 +1,54 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SPREFETCH,SPREFETCH-SDAG %s -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SPREFETCH,SPREFETCH-GISEL %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-SDAG %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,GFX1250,GL2-ONLY %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX1250-SPREFETCH,GFX1250-SPREFETCH-GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 -mattr=+safe-cu-prefetch < %s | FileCheck --check-prefixes=GCN,GFX1250,SAFE-CU %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -mattr=+safe-smem-prefetch < %s | FileCheck --check-prefixes=GCN,SPREFETCH,GFX12-SPREFETCH,SPREFETCH-GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck --check-prefixes=GCN,NOSPREFETCH %s ; Scalar data prefetch define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_data_sgpr: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_data_sgpr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_data_sgpr: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1) ret void } define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_data_sgpr_offset: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_data_sgpr_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:512 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_data_sgpr_offset: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x200, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: %gep = getelementptr float, ptr addrspace(4) %ptr, i32 128 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) @@ -40,14 +58,20 @@ entry: ; Check large offsets define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_data_sgpr_max_offset: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_data_sgpr_max_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:8388607 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_data_sgpr_max_offset: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_max_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) @@ -55,6 +79,20 @@ entry: } define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { +; GFX1250-LABEL: prefetch_data_sgpr_min_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:-8388608 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_min_offset: +; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], lit64(0xffffffffff800000) +; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm +; ; NOSPREFETCH-LABEL: prefetch_data_sgpr_min_offset: ; NOSPREFETCH: ; %bb.0: ; %entry ; NOSPREFETCH-NEXT: s_endpgm @@ -68,6 +106,13 @@ define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr ; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; SPREFETCH-SDAG-NEXT: s_endpgm ; +; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset: +; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm +; ; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_min_offset: ; SPREFETCH-GISEL: ; %bb.0: ; %entry ; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 @@ -81,6 +126,18 @@ entry: } define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) { +; GFX1250-LABEL: prefetch_data_sgpr_too_large_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0x800000 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_data_sgpr_too_large_offset: +; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000 +; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm +; ; NOSPREFETCH-LABEL: prefetch_data_sgpr_too_large_offset: ; NOSPREFETCH: ; %bb.0: ; %entry ; NOSPREFETCH-NEXT: s_endpgm @@ -91,6 +148,13 @@ define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inre ; SPREFETCH-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; SPREFETCH-SDAG-NEXT: s_endpgm ; +; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset: +; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 +; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm +; ; SPREFETCH-GISEL-LABEL: prefetch_data_sgpr_too_large_offset: ; SPREFETCH-GISEL: ; %bb.0: ; %entry ; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 @@ -105,15 +169,113 @@ entry: ; Check divergent address -define amdgpu_ps void @prefetch_data_vgpr(ptr addrspace(1) %ptr) { -; GCN-LABEL: prefetch_data_vgpr: -; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_endpgm +define amdgpu_ps void @prefetch_data_vgpr_global(ptr addrspace(1) %ptr) { +; GFX1250-LABEL: prefetch_data_vgpr_global: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_global: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1) ret void } +define amdgpu_ps void @prefetch_data_vgpr_flat(ptr %ptr) { +; GFX1250-LABEL: prefetch_data_vgpr_flat: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_global(ptr addrspace(1) inreg %ptr, i32 %offset) { +; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_global: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_global: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_global: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_endpgm +; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_global: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_endpgm +entry: + %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %offset + tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_sgpr_vgpr_offset_flat(ptr inreg %ptr, i32 %offset) { +; GFX1250-LABEL: prefetch_data_sgpr_vgpr_offset_flat: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_vgpr_offset_flat: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +; GFX12-LABEL: prefetch_data_sgpr_vgpr_offset_flat: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_endpgm +; GFX11-LABEL: prefetch_data_sgpr_vgpr_offset_flat: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_endpgm +entry: + %gep1 = getelementptr i8, ptr %ptr, i32 %offset + %gep2 = getelementptr i8, ptr %gep1, i32 128 + tail call void @llvm.prefetch.pf(ptr %gep2, i32 0, i32 0, i32 1) + ret void +} + ; Check LDS and Scratch, we cannot prefetch it define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) { @@ -137,43 +299,59 @@ entry: ; Check supported address spaces define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_data_sgpr_flat: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_data_sgpr_flat: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1) ret void } define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_data_sgpr_global: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_data_sgpr_global: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_data_sgpr_global: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_global: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1) ret void } define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_data_sgpr_constant_32bit: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_mov_b32 s1, 0 ; SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_constant_32bit: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1) ret void @@ -182,28 +360,36 @@ entry: ; I$ prefetch define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_inst_sgpr: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_inst_sgpr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_inst_sgpr: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_inst_sgpr: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0) ret void } define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_inst_sgpr_offset: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_inst_sgpr_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_inst_sgpr_offset: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_inst_sgpr_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) @@ -213,14 +399,18 @@ entry: ; Check large offsets define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) { -; NOSPREFETCH-LABEL: prefetch_inst_sgpr_max_offset: -; NOSPREFETCH: ; %bb.0: ; %entry -; NOSPREFETCH-NEXT: s_endpgm +; GFX1250-LABEL: prefetch_inst_sgpr_max_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_endpgm ; ; SPREFETCH-LABEL: prefetch_inst_sgpr_max_offset: ; SPREFETCH: ; %bb.0: ; %entry ; SPREFETCH-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0 ; SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_inst_sgpr_max_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm entry: %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) @@ -228,6 +418,18 @@ entry: } define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { +; GFX1250-LABEL: prefetch_inst_sgpr_min_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_min_offset: +; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-SDAG-NEXT: s_mov_b64 s[2:3], lit64(0xffffffffff800000) +; GFX1250-SPREFETCH-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm +; ; NOSPREFETCH-LABEL: prefetch_inst_sgpr_min_offset: ; NOSPREFETCH: ; %bb.0: ; %entry ; NOSPREFETCH-NEXT: s_endpgm @@ -241,6 +443,13 @@ define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr ; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 ; SPREFETCH-SDAG-NEXT: s_endpgm ; +; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset: +; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 +; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm +; ; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_min_offset: ; SPREFETCH-GISEL: ; %bb.0: ; %entry ; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 @@ -254,6 +463,16 @@ entry: } define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) { +; GFX1250-LABEL: prefetch_inst_sgpr_too_large_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset: +; GFX1250-SPREFETCH-SDAG: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000 +; GFX1250-SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-SDAG-NEXT: s_endpgm +; ; NOSPREFETCH-LABEL: prefetch_inst_sgpr_too_large_offset: ; NOSPREFETCH: ; %bb.0: ; %entry ; NOSPREFETCH-NEXT: s_endpgm @@ -264,6 +483,13 @@ define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inre ; SPREFETCH-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 ; SPREFETCH-SDAG-NEXT: s_endpgm ; +; GFX1250-SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset: +; GFX1250-SPREFETCH-GISEL: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 +; GFX1250-SPREFETCH-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 +; GFX1250-SPREFETCH-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 +; GFX1250-SPREFETCH-GISEL-NEXT: s_endpgm +; ; SPREFETCH-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset: ; SPREFETCH-GISEL: ; %bb.0: ; %entry ; SPREFETCH-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 @@ -276,6 +502,282 @@ entry: ret void } +; Check cache locality + +define amdgpu_ps void @prefetch_data_vgpr_flat_dev(ptr %ptr) { +; GFX1250-LABEL: prefetch_data_vgpr_flat_dev: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_DEV +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_dev: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_dev: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 1, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_vgpr_flat_se(ptr %ptr) { +; GFX1250-LABEL: prefetch_data_vgpr_flat_se: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_se: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_se: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 2, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_vgpr_flat_cu(ptr %ptr) { +; GL2-ONLY-LABEL: prefetch_data_vgpr_flat_cu: +; GL2-ONLY: ; %bb.0: ; %entry +; GL2-ONLY-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE +; GL2-ONLY-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] scope:SCOPE_SE +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; SAFE-CU-LABEL: prefetch_data_vgpr_flat_cu: +; SAFE-CU: ; %bb.0: ; %entry +; SAFE-CU-NEXT: flat_prefetch_b8 v[0:1] +; SAFE-CU-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_cu: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_cu: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 3, i32 1) + ret void +} + +; flat offset + +define amdgpu_ps void @prefetch_data_vgpr_flat_offset(ptr %ptr) { +; GFX1250-LABEL: prefetch_data_vgpr_flat_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v[0:1] offset:512 scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_flat_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_flat_offset: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + %gep = getelementptr float, ptr %ptr, i32 128 + tail call void @llvm.prefetch.pf(ptr %gep, i32 0, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_vgpr_global_offset(ptr addrspace(1) %ptr) { +; GFX1250-LABEL: prefetch_data_vgpr_global_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v[0:1], off offset:512 scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_offset: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + %gep = getelementptr float, ptr addrspace(1) %ptr, i32 128 + tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_vgpr_global_saddr(ptr addrspace(1) inreg %ptr, i32 %voffset) { +; GFX1250-LABEL: prefetch_data_vgpr_global_saddr: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset + tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 0, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_vgpr_global_saddr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) { +; GFX1250-LABEL: prefetch_data_vgpr_global_saddr_offset: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:128 scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_vgpr_global_saddr_offset: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + %gep1 = getelementptr i8, ptr addrspace(1) %ptr, i32 %voffset + %gep2 = getelementptr i8, ptr addrspace(1) %gep1, i32 128 + tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep2, i32 0, i32 0, i32 1) + ret void +} + +; Cannot prefetch I$ with flat or global instructions. + +define amdgpu_ps void @prefetch_inst_vgpr_global(ptr addrspace(1) %ptr) { +; GCN-LABEL: prefetch_inst_vgpr_global: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 0) + ret void +} + +define amdgpu_ps void @prefetch_inst_vgpr_flat(ptr %ptr) { +; GCN-LABEL: prefetch_inst_vgpr_flat: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 0) + ret void +} + +; Force vector prefetch for uniform address with rw = 1 argument. + +define amdgpu_ps void @prefetch_data_sgpr_flat_force_vector(ptr inreg %ptr) { +; GFX1250-LABEL: prefetch_data_sgpr_flat_force_vector: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SPREFETCH-NEXT: flat_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_flat_force_vector: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.pf(ptr %ptr, i32 1, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_sgpr_global_force_vector(ptr addrspace(1) inreg %ptr) { +; GFX1250-LABEL: prefetch_data_sgpr_global_force_vector: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_force_vector: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 1, i32 0, i32 1) + ret void +} + +define amdgpu_ps void @prefetch_data_sgpr_global_saddr_force_vector(ptr addrspace(1) inreg %ptr) { +; GFX1250-LABEL: prefetch_data_sgpr_global_saddr_force_vector: +; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS +; GFX1250-NEXT: s_endpgm +; +; GFX1250-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector: +; GFX1250-SPREFETCH: ; %bb.0: ; %entry +; GFX1250-SPREFETCH-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-SPREFETCH-NEXT: global_prefetch_b8 v0, s[0:1] offset:1024 scope:SCOPE_SYS +; GFX1250-SPREFETCH-NEXT: s_endpgm +; +; NOSPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector: +; NOSPREFETCH: ; %bb.0: ; %entry +; NOSPREFETCH-NEXT: s_endpgm +; +; GFX12-SPREFETCH-LABEL: prefetch_data_sgpr_global_saddr_force_vector: +; GFX12-SPREFETCH: ; %bb.0: ; %entry +; GFX12-SPREFETCH-NEXT: s_prefetch_data s[0:1], 0x400, null, 0 +; GFX12-SPREFETCH-NEXT: s_endpgm +entry: + %gep = getelementptr i8, ptr addrspace(1) %ptr, i32 1024 + tail call void @llvm.prefetch.p1(ptr addrspace(1) %gep, i32 1, i32 0, i32 1) + ret void +} + declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32) declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32) declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32) |