aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll125
1 files changed, 14 insertions, 111 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 4e94a64..66b22be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -20,7 +20,6 @@ declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr,
declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
@@ -1923,54 +1922,6 @@ main_body:
ret double %ret
}
-define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) {
-; GFX90A-LABEL: local_atomic_fadd_f64_noret:
-; GFX90A: ; %bb.0: ; %main_body
-; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x24
-; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v2, s4
-; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
-; GFX90A-NEXT: s_endpgm
-;
-; GFX940-LABEL: local_atomic_fadd_f64_noret:
-; GFX940: ; %bb.0: ; %main_body
-; GFX940-NEXT: s_load_dword s4, s[0:1], 0x24
-; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v2, s4
-; GFX940-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
-; GFX940-NEXT: ds_add_f64 v2, v[0:1]
-; GFX940-NEXT: s_endpgm
-main_body:
- %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
- ret void
-}
-
-define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
-; GFX90A-LABEL: local_atomic_fadd_f64_rtn:
-; GFX90A: ; %bb.0: ; %main_body
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
-; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn:
-; GFX940: ; %bb.0: ; %main_body
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v4, v1
-; GFX940-NEXT: v_mov_b32_e32 v5, v2
-; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-main_body:
- %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
- ret double %ret
-}
-
define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 {
; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
; GFX90A: ; %bb.0: ; %main_body
@@ -1980,7 +1931,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX90A-NEXT: s_cbranch_execz .LBB65_2
+; GFX90A-NEXT: s_cbranch_execz .LBB63_2
; GFX90A-NEXT: ; %bb.1:
; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX90A-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
@@ -1990,7 +1941,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX90A-NEXT: v_mov_b32_e32 v2, s0
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: .LBB65_2:
+; GFX90A-NEXT: .LBB63_2:
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: local_atomic_fadd_f64_noret_pat:
@@ -2001,7 +1952,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX940-NEXT: s_cbranch_execz .LBB65_2
+; GFX940-NEXT: s_cbranch_execz .LBB63_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
@@ -2011,7 +1962,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr
; GFX940-NEXT: v_mov_b32_e32 v2, s0
; GFX940-NEXT: ds_add_f64 v2, v[0:1]
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: .LBB65_2:
+; GFX940-NEXT: .LBB63_2:
; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
@@ -2027,7 +1978,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX90A-NEXT: s_cbranch_execz .LBB66_2
+; GFX90A-NEXT: s_cbranch_execz .LBB64_2
; GFX90A-NEXT: ; %bb.1:
; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX90A-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
@@ -2037,7 +1988,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX90A-NEXT: v_mov_b32_e32 v2, s0
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: .LBB66_2:
+; GFX90A-NEXT: .LBB64_2:
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush:
@@ -2048,7 +1999,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX940-NEXT: s_cbranch_execz .LBB66_2
+; GFX940-NEXT: s_cbranch_execz .LBB64_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
@@ -2058,14 +2009,14 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3
; GFX940-NEXT: v_mov_b32_e32 v2, s0
; GFX940-NEXT: ds_add_f64 v2, v[0:1]
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: .LBB66_2:
+; GFX940-NEXT: .LBB64_2:
; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
ret void
}
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #2 {
; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
; GFX90A: ; %bb.0: ; %main_body
; GFX90A-NEXT: s_mov_b64 s[2:3], exec
@@ -2074,7 +2025,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX90A-NEXT: s_cbranch_execz .LBB67_2
+; GFX90A-NEXT: s_cbranch_execz .LBB65_2
; GFX90A-NEXT: ; %bb.1:
; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX90A-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
@@ -2084,7 +2035,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX90A-NEXT: v_mov_b32_e32 v2, s0
; GFX90A-NEXT: ds_add_f64 v2, v[0:1]
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: .LBB67_2:
+; GFX90A-NEXT: .LBB65_2:
; GFX90A-NEXT: s_endpgm
;
; GFX940-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
@@ -2095,7 +2046,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX940-NEXT: v_mbcnt_hi_u32_b32 v0, s4, v0
; GFX940-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
; GFX940-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX940-NEXT: s_cbranch_execz .LBB67_2
+; GFX940-NEXT: s_cbranch_execz .LBB65_2
; GFX940-NEXT: ; %bb.1:
; GFX940-NEXT: s_load_dword s0, s[0:1], 0x24
; GFX940-NEXT: s_bcnt1_i32_b64 s1, s[2:3]
@@ -2105,7 +2056,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrsp
; GFX940-NEXT: v_mov_b32_e32 v2, s0
; GFX940-NEXT: ds_add_f64 v2, v[0:1]
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: .LBB67_2:
+; GFX940-NEXT: .LBB65_2:
; GFX940-NEXT: s_endpgm
main_body:
%ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
@@ -2134,54 +2085,6 @@ main_body:
ret double %ret
}
-define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 {
-; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
-; GFX90A: ; %bb.0: ; %main_body
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
-; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
-; GFX940: ; %bb.0: ; %main_body
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v4, v1
-; GFX940-NEXT: v_mov_b32_e32 v5, v2
-; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-main_body:
- %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
- ret double %ret
-}
-
-define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 {
-; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
-; GFX90A: ; %bb.0: ; %main_body
-; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX90A-NEXT: v_mov_b32_e32 v4, v1
-; GFX90A-NEXT: v_mov_b32_e32 v5, v2
-; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
-; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX940-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
-; GFX940: ; %bb.0: ; %main_body
-; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX940-NEXT: v_mov_b32_e32 v4, v1
-; GFX940-NEXT: v_mov_b32_e32 v5, v2
-; GFX940-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5]
-; GFX940-NEXT: s_waitcnt lgkmcnt(0)
-; GFX940-NEXT: s_setpc_b64 s[30:31]
-main_body:
- %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
- ret double %ret
-}
-
attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
-attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }
-attributes #3 = { "denormal-fp-math"="ieee,ieee" }
-attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" }