aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/fract-match.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fract-match.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/fract-match.ll428
1 files changed, 428 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index f6ee007..80b4d64 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -14,6 +14,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX8 %s
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s
; Test patterns to match v_fract_* instructions.
@@ -103,6 +104,21 @@ define float @safe_math_fract_f32(float %x, ptr addrspace(1) nocapture writeonly
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v3, v0
+; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT: v_floor_f32_e32 v4, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX12-NEXT: global_store_b32 v[1:2], v4, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -181,6 +197,18 @@ define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) nocaptu
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f32_noinf_check:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v3, v0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[1:2], v3, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -263,6 +291,22 @@ define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) nocapture w
; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: no_nan_check_math_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v3, v0
+; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0|
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3
+; GFX12-NEXT: global_store_b32 v[1:2], v3, off
+; GFX12-NEXT: v_min_num_f32_e32 v4, 0x3f7fffff, v4
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -314,6 +358,16 @@ define float @basic_fract_f32_nonans(float nofpclass(nan) %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_f32_nonans:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -362,6 +416,19 @@ define float @basic_fract_f32_flags_minnum(float %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_f32_flags_minnum:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -409,6 +476,16 @@ define float @basic_fract_f32_flags_fsub(float nofpclass(nan) %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_f32_flags_fsub:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub nsz float %x, %floor
@@ -467,6 +544,17 @@ define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) {
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: v_fract_f32_e32 v1, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_v2f32_nonans:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: v_fract_f32_e32 v1, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
%sub = fsub <2 x float> %x, %floor
@@ -540,6 +628,20 @@ define float @basic_fract_f32_multi_use_fsub_nonans(float nofpclass(nan) %x, ptr
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: global_store_b32 v[1:2], v3, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_f32_multi_use_fsub_nonans:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v3, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v3, v0, v3
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: global_store_b32 v[1:2], v3, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -588,6 +690,16 @@ define float @nnan_minnum_fract_f32(float %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: nnan_minnum_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -638,6 +750,19 @@ define float @nnan_fsub_fract_f32(float %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: nnan_fsub_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub nnan float %x, %floor
@@ -686,6 +811,19 @@ define float @nnan_floor_fract_f32(float %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: nnan_floor_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call nnan float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -733,6 +871,16 @@ define float @nnan_src_fract_f32(float nofpclass(nan) %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: nnan_src_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -782,6 +930,19 @@ define float @not_fract_f32_wrong_const(float nofpclass(nan) %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: not_fract_f32_wrong_const:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7ffffe, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -831,6 +992,19 @@ define float @not_fract_f32_swapped_fsub(float nofpclass(nan) %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v1, v0
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: not_fract_f32_swapped_fsub:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v1, v0
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %floor, %x
@@ -880,6 +1054,19 @@ define float @not_fract_f32_not_floor(float nofpclass(nan) %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: not_fract_f32_not_floor:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_trunc_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.trunc.f32(float %x)
%sub = fsub float %x, %floor
@@ -929,6 +1116,19 @@ define float @not_fract_f32_different_floor(float %x, float %y) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: not_fract_f32_different_floor:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %y)
%sub = fsub float %x, %floor
@@ -978,6 +1178,19 @@ define float @not_fract_f32_maxnum(float nofpclass(nan) %x) {
; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX11-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: not_fract_f32_maxnum:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX12-NEXT: v_max_num_f32_e32 v0, 0x3f7fffff, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1000,6 +1213,15 @@ define float @fcmp_uno_check_is_nan_f32(float %x) {
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: fcmp_uno_check_is_nan_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1054,6 +1276,16 @@ define float @select_nan_fract_f32(float %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: select_nan_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1107,6 +1339,16 @@ define float @commuted_select_nan_fract_f32(float %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: commuted_select_nan_fract_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1168,6 +1410,22 @@ define float @wrong_commuted_nan_select_f32(float %x) {
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: wrong_commuted_nan_select_f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f32_e32 v1, v0
+; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_sub_f32_e32 v1, v0, v1
+; GFX12-NEXT: v_min_num_f32_e32 v1, 0x3f7fffff, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1231,6 +1489,16 @@ define half @basic_fract_f16_nonan(half nofpclass(nan) %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_f16_nonan:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f16_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call half @llvm.floor.f16(half %x)
%sub = fsub half %x, %floor
@@ -1313,6 +1581,20 @@ define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) {
; GFX11-NEXT: v_fract_f16_e32 v1, v1
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_v2f16_nonan:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX12-NEXT: v_fract_f16_e32 v0, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_fract_f16_e32 v1, v1
+; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
%sub = fsub <2 x half> %x, %floor
@@ -1369,6 +1651,16 @@ define double @basic_fract_f64_nanans(double nofpclass(nan) %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: basic_fract_f64_nanans:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call double @llvm.floor.f64(double %x)
%sub = fsub double %x, %floor
@@ -1461,6 +1753,18 @@ define half @safe_math_fract_f16_noinf_check(half %x, ptr addrspace(1) nocapture
; GFX11-NEXT: v_fract_f16_e32 v0, v0
; GFX11-NEXT: global_store_b16 v[1:2], v3, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f16_noinf_check:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f16_e32 v3, v0
+; GFX12-NEXT: v_fract_f16_e32 v0, v0
+; GFX12-NEXT: global_store_b16 v[1:2], v3, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call half @llvm.floor.f16(half %x)
%sub = fsub half %x, %floor
@@ -1546,6 +1850,18 @@ define double @safe_math_fract_f64_noinf_check(double %x, ptr addrspace(1) nocap
; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f64_noinf_check:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_floor_f64_e32 v[4:5], v[0:1]
+; GFX12-NEXT: v_fract_f64_e32 v[0:1], v[0:1]
+; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call double @llvm.floor.f64(double %x)
%sub = fsub double %x, %floor
@@ -1600,6 +1916,16 @@ define float @select_nan_fract_f32_flags_select(float %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: select_nan_fract_f32_flags_select:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1653,6 +1979,16 @@ define float @select_nan_fract_f32_flags_minnum(float %x) {
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: v_fract_f32_e32 v0, v0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: select_nan_fract_f32_flags_minnum:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v0, v0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call float @llvm.floor.f32(float %x)
%sub = fsub float %x, %floor
@@ -1769,6 +2105,25 @@ define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) nocap
; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off
; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_v2f32:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f32_e32 v6, v0
+; GFX12-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204
+; GFX12-NEXT: v_fract_f32_e32 v7, v1
+; GFX12-NEXT: v_floor_f32_e32 v4, v0
+; GFX12-NEXT: v_floor_f32_e32 v5, v1
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX12-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
+; GFX12-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204
+; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off
+; GFX12-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x)
%sub = fsub <2 x float> %x, %floor
@@ -1881,6 +2236,21 @@ define double @safe_math_fract_f64(double %x, ptr addrspace(1) nocapture writeon
; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f64:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f64_e32 v[4:5], v[0:1]
+; GFX12-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]|
+; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5
+; GFX12-NEXT: global_store_b64 v[2:3], v[6:7], off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call double @llvm.floor.f64(double %x)
%sub = fsub double %x, %floor
@@ -2002,6 +2372,21 @@ define half @safe_math_fract_f16(half %x, ptr addrspace(1) nocapture writeonly %
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
; GFX11-NEXT: global_store_b16 v[1:2], v4, off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_f16:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f16_e32 v3, v0
+; GFX12-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0|
+; GFX12-NEXT: v_floor_f16_e32 v4, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo
+; GFX12-NEXT: global_store_b16 v[1:2], v4, off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call half @llvm.floor.f16(half %x)
%sub = fsub half %x, %floor
@@ -2168,6 +2553,29 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu
; GFX11-NEXT: global_store_b32 v[1:2], v4, off
; GFX11-NEXT: v_pack_b32_f16 v0, v0, v3
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_v2f16:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0
+; GFX12-NEXT: v_fract_f16_e32 v6, v0
+; GFX12-NEXT: v_floor_f16_e32 v5, v0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_fract_f16_e32 v4, v3
+; GFX12-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204
+; GFX12-NEXT: v_floor_f16_e32 v7, v3
+; GFX12-NEXT: v_cndmask_b32_e64 v3, v4, 0, s0
+; GFX12-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT: v_pack_b32_f16 v4, v5, v7
+; GFX12-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0
+; GFX12-NEXT: global_store_b32 v[1:2], v4, off
+; GFX12-NEXT: v_pack_b32_f16 v0, v0, v3
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x)
%sub = fsub <2 x half> %x, %floor
@@ -2311,6 +2719,26 @@ define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) noc
; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1
; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX12-LABEL: safe_math_fract_v2f64:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: s_wait_expcnt 0x0
+; GFX12-NEXT: s_wait_samplecnt 0x0
+; GFX12-NEXT: s_wait_bvhcnt 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_fract_f64_e32 v[10:11], v[0:1]
+; GFX12-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204
+; GFX12-NEXT: v_fract_f64_e32 v[12:13], v[2:3]
+; GFX12-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204
+; GFX12-NEXT: v_floor_f64_e32 v[8:9], v[2:3]
+; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1]
+; GFX12-NEXT: v_cndmask_b32_e64 v0, v10, 0, s0
+; GFX12-NEXT: v_cndmask_b32_e64 v1, v11, 0, s0
+; GFX12-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1
+; GFX12-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1
+; GFX12-NEXT: global_store_b128 v[4:5], v[6:9], off
+; GFX12-NEXT: s_setpc_b64 s[30:31]
entry:
%floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x)
%sub = fsub <2 x double> %x, %floor