diff options
author | Jay Foad <jay.foad@amd.com> | 2024-04-17 14:58:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-17 14:58:13 +0100 |
commit | 856d1c44103f09f2ed0448001de9dcda63055733 (patch) | |
tree | e39f8784d215cbd7b7ca09d4244d84b640ac13d7 | |
parent | 41b7341d6b27adf81262a5a0bd4e430675b73bbb (diff) | |
download | llvm-856d1c44103f09f2ed0448001de9dcda63055733.zip llvm-856d1c44103f09f2ed0448001de9dcda63055733.tar.gz llvm-856d1c44103f09f2ed0448001de9dcda63055733.tar.bz2 |
[AMDGPU] Fix predicates for BUFFER_ATOMIC_FMIN/FMAX patterns (#89066)
Use OtherPredicates to avoid interfering with other uses of
SubtargetPredicate for GFX12.
-rw-r--r-- | llvm/lib/Target/AMDGPU/BUFInstructions.td | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll | 72 |
2 files changed, 73 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 273f92a..8053d89 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -1726,7 +1726,7 @@ let SubtargetPredicate = isGFX12Plus in { defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>; } -let SubtargetPredicate = isGFX6GFX7GFX10Plus in { +let OtherPredicates = [isGFX6GFX7GFX10Plus] in { defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">; defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">; } diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll index 0c62b52..587340c 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll @@ -4,12 +4,14 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100 +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100 +; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12 declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg) declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg) @@ -70,6 +72,18 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm ; +; GFX12-LABEL: raw_buffer_atomic_min_noret_f32: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; ; G_SI-LABEL: raw_buffer_atomic_min_noret_f32: ; G_SI: ; %bb.0: ; %main_body ; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd @@ -170,6 +184,15 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, floa ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm ; +; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v0, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; ; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32: ; G_SI: ; %bb.0: ; %main_body ; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc @@ -292,6 +315,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre ; GFX1100-NEXT: ds_store_b32 v1, v0 ; GFX1100-NEXT: s_endpgm ; +; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b96 s[4:6], s[0:1], 0x34 +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX12-NEXT: s_mov_b32 s4, 4 +; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN +; GFX12-NEXT: v_mov_b32_e32 v1, s6 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: ds_store_b32 v1, v0 +; GFX12-NEXT: s_endpgm +; ; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_SI: ; %bb.0: ; %main_body ; G_SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd @@ -427,6 +464,18 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm ; +; GFX12-LABEL: raw_buffer_atomic_max_noret_f32: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_clause 0x1 +; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34 +; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; ; G_SI-LABEL: raw_buffer_atomic_max_noret_f32: ; G_SI: ; %bb.0: ; %main_body ; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd @@ -527,6 +576,15 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, floa ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm ; +; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v0, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; ; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32: ; G_SI: ; %bb.0: ; %main_body ; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc @@ -641,6 +699,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inre ; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX1100-NEXT: s_endpgm ; +; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: +; GFX12: ; %bb.0: ; %main_body +; GFX12-NEXT: s_load_b256 s[0:7], s[0:1], 0x24 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GFX12-NEXT: s_mov_b32 s4, 4 +; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN +; GFX12-NEXT: v_mov_b32_e32 v1, 0 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v1, v0, s[6:7] +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; ; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc: ; G_SI: ; %bb.0: ; %main_body ; G_SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9 |