aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2024-04-17 14:58:13 +0100
committerGitHub <noreply@github.com>2024-04-17 14:58:13 +0100
commit856d1c44103f09f2ed0448001de9dcda63055733 (patch)
treee39f8784d215cbd7b7ca09d4244d84b640ac13d7
parent41b7341d6b27adf81262a5a0bd4e430675b73bbb (diff)
downloadllvm-856d1c44103f09f2ed0448001de9dcda63055733.zip
llvm-856d1c44103f09f2ed0448001de9dcda63055733.tar.gz
llvm-856d1c44103f09f2ed0448001de9dcda63055733.tar.bz2
[AMDGPU] Fix predicates for BUFFER_ATOMIC_FMIN/FMAX patterns (#89066)
Use OtherPredicates to avoid interfering with other uses of SubtargetPredicate for GFX12.
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td2
-rw-r--r--llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll72
2 files changed, 73 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index 273f92a..8053d89 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1726,7 +1726,7 @@ let SubtargetPredicate = isGFX12Plus in {
defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["noret"]>;
}
-let SubtargetPredicate = isGFX6GFX7GFX10Plus in {
+let OtherPredicates = [isGFX6GFX7GFX10Plus] in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f32, "BUFFER_ATOMIC_FMIN">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f32, "BUFFER_ATOMIC_FMAX">;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
index 0c62b52..587340c 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
@@ -4,12 +4,14 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=GFX10
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1030
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=GFX1100
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -check-prefix=G_SI
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX7
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
+; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12
declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg)
declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg)
@@ -70,6 +72,18 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
+; GFX12-LABEL: raw_buffer_atomic_min_noret_f32:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+;
; G_SI-LABEL: raw_buffer_atomic_min_noret_f32:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -170,6 +184,15 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f32(<4 x i32> inreg %rsrc, floa
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
+; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v[0:1], v0, off
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+;
; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: buffer_atomic_fmin v0, v1, s[0:3], 0 offen glc
@@ -292,6 +315,20 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre
; GFX1100-NEXT: ds_store_b32 v1, v0
; GFX1100-NEXT: s_endpgm
;
+; GFX12-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b96 s[4:6], s[0:1], 0x34
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX12-NEXT: s_mov_b32 s4, 4
+; GFX12-NEXT: buffer_atomic_min_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN
+; GFX12-NEXT: v_mov_b32_e32 v1, s6
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: ds_store_b32 v1, v0
+; GFX12-NEXT: s_endpgm
+;
; G_SI-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
@@ -427,6 +464,18 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f32(<4 x i32> inreg %rsrc
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
+; GFX12-LABEL: raw_buffer_atomic_max_noret_f32:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b64 s[4:5], s[0:1], 0x34
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+;
; G_SI-LABEL: raw_buffer_atomic_max_noret_f32:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -527,6 +576,15 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f32(<4 x i32> inreg %rsrc, floa
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
+; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v[0:1], v0, off
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+;
; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: buffer_atomic_fmax v0, v1, s[0:3], 0 offen glc
@@ -641,6 +699,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f32_off4_slc(<4 x i32> inre
; GFX1100-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX1100-NEXT: s_endpgm
;
+; GFX12-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
+; GFX12: ; %bb.0: ; %main_body
+; GFX12-NEXT: s_load_b256 s[0:7], s[0:1], 0x24
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GFX12-NEXT: s_mov_b32 s4, 4
+; GFX12-NEXT: buffer_atomic_max_num_f32 v0, v1, s[0:3], s4 offen th:TH_ATOMIC_NT_RETURN
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v1, v0, s[6:7]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+;
; G_SI-LABEL: raw_buffer_atomic_max_rtn_f32_off4_slc:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x9