diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll | 249 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lasx/build-vector.ll | 215 | ||||
-rw-r--r-- | llvm/test/CodeGen/LoongArch/lsx/build-vector.ll | 74 | ||||
-rw-r--r-- | llvm/test/CodeGen/Mips/abiflags-soft-float.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll | 58 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/isel-fpclass.ll | 526 |
7 files changed, 962 insertions, 188 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll index fb1e46d..ea9334a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll @@ -1,13 +1,100 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel < %s | FileCheck --check-prefixes=GCN,GISEL %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GCN,GFX942 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel < %s | FileCheck --check-prefixes=GCN,GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-SDAG-STRESS %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-GISEL-STRESS %s declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32) declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32) define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 { +; GFX942-SDAG-LABEL: test_mfma_f32_16x16x8xf32: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 1.0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 2.0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x40400000 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 0 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-SDAG-NEXT: s_nop 1 +; GFX942-SDAG-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[4:5], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-SDAG-NEXT: s_nop 6 +; GFX942-SDAG-NEXT: global_store_dwordx4 v2, a[0:3], s[6:7] +; GFX942-SDAG-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: test_mfma_f32_16x16x8xf32: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0 +; GFX942-GISEL-NEXT: s_mov_b32 s5, 2.0 +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] +; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x40400000 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; GFX942-GISEL-NEXT: s_mov_b32 s5, 4.0 +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5] +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-GISEL-NEXT: s_nop 1 +; GFX942-GISEL-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-GISEL-NEXT: s_nop 5 +; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_16x16x8xf32: +; GFX942-SDAG-STRESS: ; %bb.0: ; %bb +; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0 +; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v4, 0 +; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-SDAG-STRESS-NEXT: s_nop 1 +; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-SDAG-STRESS-NEXT: s_nop 6 +; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7] +; GFX942-SDAG-STRESS-NEXT: s_endpgm +; +; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_16x16x8xf32: +; GFX942-GISEL-STRESS: ; %bb.0: ; %bb +; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s2, 0x40400000 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s3, 4.0 +; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0 +; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-GISEL-STRESS-NEXT: s_nop 1 +; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3 +; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-GISEL-STRESS-NEXT: s_nop 5 +; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7] +; GFX942-GISEL-STRESS-NEXT: s_endpgm bb: %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3) @@ -16,6 +103,157 @@ bb: } define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 { +; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32: +; GFX942-SDAG: ; %bb.0: ; %bb +; GFX942-SDAG-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 1.0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 2.0 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x40400000 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4.0 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 +; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, s4 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, s5 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, s6 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, s7 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, s8 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, s9 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, s10 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, s11 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a12, s12 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a13, s13 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a14, s14 +; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a15, s15 +; GFX942-SDAG-NEXT: s_nop 1 +; GFX942-SDAG-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-SDAG-NEXT: s_nop 7 +; GFX942-SDAG-NEXT: s_nop 1 +; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 +; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 +; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 +; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] +; GFX942-SDAG-NEXT: s_endpgm +; +; GFX942-GISEL-LABEL: test_mfma_f32_32x32x4xf32: +; GFX942-GISEL: ; %bb.0: ; %bb +; GFX942-GISEL-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 +; GFX942-GISEL-NEXT: s_mov_b32 s18, 1.0 +; GFX942-GISEL-NEXT: s_mov_b32 s19, 2.0 +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[18:19] +; GFX942-GISEL-NEXT: s_mov_b32 s18, 0x40400000 +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 +; GFX942-GISEL-NEXT: s_mov_b32 s19, 4.0 +; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19] +; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a4, s4 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a5, s5 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a6, s6 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a7, s7 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a8, s8 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a9, s9 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a10, s10 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a11, s11 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a12, s12 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a13, s13 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a14, s14 +; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a15, s15 +; GFX942-GISEL-NEXT: s_nop 1 +; GFX942-GISEL-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-GISEL-NEXT: s_nop 7 +; GFX942-GISEL-NEXT: s_nop 1 +; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] +; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 +; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 +; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 +; GFX942-GISEL-NEXT: s_endpgm +; +; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_32x32x4xf32: +; GFX942-SDAG-STRESS: ; %bb.0: ; %bb +; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0 +; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 +; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a4, s4 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a5, s5 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a6, s6 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a7, s7 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a8, s8 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a9, s9 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a10, s10 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a11, s11 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a12, s12 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a13, s13 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a14, s14 +; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a15, s15 +; GFX942-SDAG-STRESS-NEXT: s_nop 1 +; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-SDAG-STRESS-NEXT: s_nop 7 +; GFX942-SDAG-STRESS-NEXT: s_nop 1 +; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 +; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 +; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 +; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] +; GFX942-SDAG-STRESS-NEXT: s_endpgm +; +; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_32x32x4xf32: +; GFX942-GISEL-STRESS: ; %bb.0: ; %bb +; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24 +; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0 +; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0) +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a4, s4 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a5, s5 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a6, s6 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a7, s7 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a8, s8 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a9, s9 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a10, s10 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a11, s11 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a12, s12 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a13, s13 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a14, s14 +; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a15, s15 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0 +; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 0x40400000 +; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 4.0 +; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[0:1] +; GFX942-GISEL-STRESS-NEXT: s_nop 1 +; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3 +; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0 +; GFX942-GISEL-STRESS-NEXT: s_nop 7 +; GFX942-GISEL-STRESS-NEXT: s_nop 1 +; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17] +; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16 +; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32 +; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48 +; GFX942-GISEL-STRESS-NEXT: s_endpgm bb: %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3) @@ -25,6 +263,5 @@ bb: attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} ; GFX942: {{.*}} -; GISEL: {{.*}} +; GFX942-STRESS: {{.*}} diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll index 5130865..c18c637 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -436,49 +436,47 @@ entry: define void @buildvector_v32i8_with_constant(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a8, i8 %a9, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind { ; CHECK-LABEL: buildvector_v32i8_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: ld.b $t0, $fp, 0 -; CHECK-NEXT: ld.b $t1, $fp, 8 -; CHECK-NEXT: ld.b $t2, $fp, 16 -; CHECK-NEXT: ld.b $t3, $fp, 24 -; CHECK-NEXT: ld.b $t4, $fp, 56 -; CHECK-NEXT: ld.b $t5, $fp, 32 -; CHECK-NEXT: ld.b $t6, $fp, 48 -; CHECK-NEXT: ld.b $t7, $fp, 40 -; CHECK-NEXT: st.b $t4, $sp, 63 -; CHECK-NEXT: st.b $zero, $sp, 61 -; CHECK-NEXT: st.b $t6, $sp, 60 -; CHECK-NEXT: st.b $t7, $sp, 59 -; CHECK-NEXT: st.b $zero, $sp, 56 -; CHECK-NEXT: st.b $t5, $sp, 55 -; CHECK-NEXT: st.b $t3, $sp, 54 -; CHECK-NEXT: st.b $zero, $sp, 53 -; CHECK-NEXT: st.b $t2, $sp, 52 -; CHECK-NEXT: st.b $zero, $sp, 51 -; CHECK-NEXT: st.b $t1, $sp, 50 -; CHECK-NEXT: st.b $t0, $sp, 49 -; CHECK-NEXT: st.b $zero, $sp, 48 -; CHECK-NEXT: st.b $a7, $sp, 47 -; CHECK-NEXT: st.h $zero, $sp, 44 -; CHECK-NEXT: st.b $zero, $sp, 42 -; CHECK-NEXT: st.b $a6, $sp, 41 -; CHECK-NEXT: st.b $a5, $sp, 40 -; CHECK-NEXT: st.b $zero, $sp, 39 -; CHECK-NEXT: st.b $a4, $sp, 37 -; CHECK-NEXT: st.h $zero, $sp, 35 -; CHECK-NEXT: st.b $a3, $sp, 34 -; CHECK-NEXT: st.b $a2, $sp, 33 -; CHECK-NEXT: st.b $a1, $sp, 32 -; CHECK-NEXT: xvld $xr0, $sp, 32 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: ld.b $t0, $sp, 56 +; CHECK-NEXT: ld.b $t1, $sp, 48 +; CHECK-NEXT: ld.b $t2, $sp, 40 +; CHECK-NEXT: ld.b $t3, $sp, 32 +; CHECK-NEXT: ld.b $t4, $sp, 24 +; CHECK-NEXT: ld.b $t5, $sp, 16 +; CHECK-NEXT: ld.b $t6, $sp, 8 +; CHECK-NEXT: ld.b $t7, $sp, 0 +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 9 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t7, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t6, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t5, 4 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t4, 6 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t3, 7 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t2, 11 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t1, 12 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.b $vr1, $t0, 15 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 @@ -624,32 +622,19 @@ entry: define void @buildvector_v16i16_with_constant(ptr %dst, i16 %a2, i16 %a3, i16 %a5, i16 %a6, i16 %a7, i16 %a12, i16 %a13) nounwind { ; CHECK-LABEL: buildvector_v16i16_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: st.h $a7, $sp, 58 -; CHECK-NEXT: st.h $a6, $sp, 56 -; CHECK-NEXT: st.h $a5, $sp, 46 -; CHECK-NEXT: st.h $a4, $sp, 44 -; CHECK-NEXT: st.h $a3, $sp, 42 -; CHECK-NEXT: ori $a3, $zero, 2 -; CHECK-NEXT: st.h $a3, $sp, 40 -; CHECK-NEXT: st.h $a2, $sp, 38 -; CHECK-NEXT: st.h $a1, $sp, 36 -; CHECK-NEXT: lu12i.w $a1, 32 -; CHECK-NEXT: ori $a1, $a1, 2 -; CHECK-NEXT: st.w $a1, $sp, 60 -; CHECK-NEXT: st.w $a1, $sp, 32 -; CHECK-NEXT: lu32i.d $a1, 131074 -; CHECK-NEXT: st.d $a1, $sp, 48 -; CHECK-NEXT: xvld $xr0, $sp, 32 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvrepli.h $xr0, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 6 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 7 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a6, 4 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a7, 5 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <16 x i16> undef, i16 2, i32 0 @@ -724,24 +709,12 @@ entry: define void @buildvector_v8i32_with_constant(ptr %dst, i32 %a2, i32 %a4, i32 %a5, i32 %a6) nounwind { ; CHECK-LABEL: buildvector_v8i32_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: st.w $zero, $sp, 60 -; CHECK-NEXT: st.w $a4, $sp, 56 -; CHECK-NEXT: st.w $a3, $sp, 52 -; CHECK-NEXT: st.w $a2, $sp, 48 -; CHECK-NEXT: st.w $zero, $sp, 44 -; CHECK-NEXT: st.w $a1, $sp, 40 -; CHECK-NEXT: st.d $zero, $sp, 32 -; CHECK-NEXT: xvld $xr0, $sp, 32 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 6 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <8 x i32> undef, i32 0, i32 0 @@ -793,21 +766,10 @@ entry: define void @buildvector_v4i64_with_constant(ptr %dst, i64 %a0, i64 %a2) nounwind { ; CHECK-LABEL: buildvector_v4i64_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: st.d $zero, $sp, 56 -; CHECK-NEXT: st.d $a2, $sp, 48 -; CHECK-NEXT: st.d $zero, $sp, 40 -; CHECK-NEXT: st.d $a1, $sp, 32 -; CHECK-NEXT: xvld $xr0, $sp, 32 +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 2 ; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 ; CHECK-NEXT: ret entry: %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 @@ -880,27 +842,17 @@ entry: define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, float %a5, float %a7) nounwind { ; CHECK-LABEL: buildvector_v8f32_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: fst.s $fa3, $sp, 60 -; CHECK-NEXT: fst.s $fa2, $sp, 52 -; CHECK-NEXT: fst.s $fa1, $sp, 40 -; CHECK-NEXT: fst.s $fa0, $sp, 36 -; CHECK-NEXT: vldi $vr0, -1280 -; CHECK-NEXT: fst.s $fa0, $sp, 56 +; CHECK-NEXT: # kill: def $f3 killed $f3 def $xr3 +; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2 +; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 ; CHECK-NEXT: lu12i.w $a1, 262144 -; CHECK-NEXT: lu52i.d $a1, $a1, 1024 -; CHECK-NEXT: st.d $a1, $sp, 44 -; CHECK-NEXT: fst.s $fa0, $sp, 32 -; CHECK-NEXT: xvld $xr0, $sp, 32 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1 +; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1 +; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2 +; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5 +; CHECK-NEXT: xvinsve0.w $xr4, $xr3, 7 +; CHECK-NEXT: xvst $xr4, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <8 x float> undef, float 2.0, i32 0 @@ -956,21 +908,12 @@ entry: define void @buildvector_v4f64_with_constant(ptr %dst, double %a0, double %a3) nounwind { ; CHECK-LABEL: buildvector_v4f64_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -96 -; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill -; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill -; CHECK-NEXT: addi.d $fp, $sp, 96 -; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0 -; CHECK-NEXT: fst.d $fa1, $sp, 56 -; CHECK-NEXT: vrepli.b $vr1, 0 -; CHECK-NEXT: vst $vr1, $sp, 40 -; CHECK-NEXT: fst.d $fa0, $sp, 32 -; CHECK-NEXT: xvld $xr0, $sp, 32 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $fp, -96 -; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload -; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload -; CHECK-NEXT: addi.d $sp, $sp, 96 +; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1 +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: xvrepli.b $xr2, 0 +; CHECK-NEXT: xvinsve0.d $xr2, $xr0, 0 +; CHECK-NEXT: xvinsve0.d $xr2, $xr1, 3 +; CHECK-NEXT: xvst $xr2, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <4 x double> undef, double %a0, i32 0 diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll index 78588c5..9517558 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -307,23 +307,15 @@ entry: define void @buildvector_v16i8_with_constant(ptr %dst, i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) nounwind { ; CHECK-LABEL: buildvector_v16i8_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: st.b $a7, $sp, 15 -; CHECK-NEXT: st.h $zero, $sp, 13 -; CHECK-NEXT: st.b $a6, $sp, 12 -; CHECK-NEXT: st.b $a5, $sp, 11 -; CHECK-NEXT: st.h $zero, $sp, 9 -; CHECK-NEXT: st.b $a4, $sp, 8 -; CHECK-NEXT: st.b $zero, $sp, 7 -; CHECK-NEXT: st.b $a3, $sp, 6 -; CHECK-NEXT: st.b $zero, $sp, 5 -; CHECK-NEXT: st.b $a2, $sp, 4 -; CHECK-NEXT: st.b $zero, $sp, 3 -; CHECK-NEXT: st.h $zero, $sp, 1 -; CHECK-NEXT: st.b $a1, $sp, 0 -; CHECK-NEXT: vld $vr0, $sp, 0 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 6 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 11 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 12 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 @@ -398,16 +390,12 @@ entry: define void @buildvector_v8i16_with_constant(ptr %dst, i16 %a0, i16 %a3, i16 %a4, i16 %a5) nounwind { ; CHECK-LABEL: buildvector_v8i16_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: st.h $zero, $sp, 12 -; CHECK-NEXT: st.h $a4, $sp, 10 -; CHECK-NEXT: st.h $a3, $sp, 8 -; CHECK-NEXT: st.h $a2, $sp, 6 -; CHECK-NEXT: st.h $zero, $sp, 2 -; CHECK-NEXT: st.h $a1, $sp, 0 -; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 5 ; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret entry: %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 @@ -459,15 +447,11 @@ entry: define void @buildvector_v4i32_with_constant(ptr %dst, i32 %a0, i32 %a2, i32 %a3) nounwind { ; CHECK-LABEL: buildvector_v4i32_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: st.w $a3, $sp, 12 -; CHECK-NEXT: st.w $a2, $sp, 8 -; CHECK-NEXT: ori $a2, $zero, 2 -; CHECK-NEXT: st.w $a2, $sp, 4 -; CHECK-NEXT: st.w $a1, $sp, 0 -; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vrepli.w $vr0, 2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 3 ; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 ; CHECK-NEXT: ret entry: %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 @@ -508,9 +492,8 @@ entry: define void @buildvector_v2i64_with_constant(ptr %dst, i64 %a1) nounwind { ; CHECK-LABEL: buildvector_v2i64_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsgr2vr.d $vr0, $zero, 0 -; CHECK-NEXT: vinsgr2vr.d $vr1, $a1, 0 -; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -561,15 +544,14 @@ entry: define void @buildvector_v4f32_with_constant(ptr %dst, float %a1, float %a2, float %a3) nounwind { ; CHECK-LABEL: buildvector_v4f32_with_constant: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi.d $sp, $sp, -16 -; CHECK-NEXT: fst.s $fa2, $sp, 12 -; CHECK-NEXT: fst.s $fa1, $sp, 8 -; CHECK-NEXT: fst.s $fa0, $sp, 4 -; CHECK-NEXT: movgr2fr.w $fa0, $zero -; CHECK-NEXT: fst.s $fa0, $sp, 0 -; CHECK-NEXT: vld $vr0, $sp, 0 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2 +; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1 +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 +; CHECK-NEXT: vrepli.b $vr3, 0 +; CHECK-NEXT: vextrins.w $vr3, $vr0, 16 +; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 +; CHECK-NEXT: vextrins.w $vr3, $vr2, 48 +; CHECK-NEXT: vst $vr3, $a0, 0 ; CHECK-NEXT: ret entry: %ins0 = insertelement <4 x float> undef, float 0.0, i32 0 diff --git a/llvm/test/CodeGen/Mips/abiflags-soft-float.ll b/llvm/test/CodeGen/Mips/abiflags-soft-float.ll new file mode 100644 index 0000000..01821f2 --- /dev/null +++ b/llvm/test/CodeGen/Mips/abiflags-soft-float.ll @@ -0,0 +1,12 @@ +; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o tmp.o +; RUN: llvm-readobj -A tmp.o | FileCheck %s -check-prefix=OBJ +; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | \ +; RUN: FileCheck %s -check-prefix=ASM + +; OBJ: FP ABI: Soft float +; ASM: .module softfloat + +define dso_local void @asm_is_null() "use-soft-float"="true" { + call void asm sideeffect "", ""() + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 021c737..fba592d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -634,3 +634,19 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i %deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec) ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results } + +define { <8 x float>, <8 x float> } @deinterleave_unrelated(<16 x float> %arg) { +; CHECK-LABEL: deinterleave_unrelated: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vfabs.v v12, v8 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wx v10, v12, a0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: ret +entry: + %abs = call <16 x float> @llvm.fabs(<16 x float> %arg) + %res = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %abs) + ret { <8 x float>, <8 x float> } %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 0a96e4f..5b1746d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -3744,3 +3744,61 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc %ext = extractvalue {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} %res, 5 ret <vscale x 1 x float> %ext } + + +define { <8 x float>, <8 x float> } @interleave_deinterleave2(<8 x float> %a, <8 x float> %b) { +; V-LABEL: interleave_deinterleave2: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; V-NEXT: vwaddu.vv v12, v8, v10 +; V-NEXT: li a0, -1 +; V-NEXT: vwmaccu.vx v12, a0, v10 +; V-NEXT: li a0, 32 +; V-NEXT: vnsrl.wx v10, v12, a0 +; V-NEXT: vnsrl.wi v8, v12, 0 +; V-NEXT: ret +; +; ZIP-LABEL: interleave_deinterleave2: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZIP-NEXT: vmv2r.v v12, v10 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: ri.vzip2a.vv v16, v8, v12 +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: vnsrl.wx v10, v16, a0 +; ZIP-NEXT: vnsrl.wi v8, v16, 0 +; ZIP-NEXT: ret +entry: + %0 = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b) + %1 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %0) + ret { <8 x float>, <8 x float> } %1 +} + +define <16 x float> @deinterleave_interleave2(<16 x float> %arg) { +; V-LABEL: deinterleave_interleave2: +; V: # %bb.0: # %entry +; V-NEXT: li a0, 32 +; V-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; V-NEXT: vnsrl.wi v12, v8, 0 +; V-NEXT: vnsrl.wx v14, v8, a0 +; V-NEXT: vwaddu.vv v8, v12, v14 +; V-NEXT: li a0, -1 +; V-NEXT: vwmaccu.vx v8, a0, v14 +; V-NEXT: ret +; +; ZIP-LABEL: deinterleave_interleave2: +; ZIP: # %bb.0: # %entry +; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; ZIP-NEXT: vnsrl.wi v12, v8, 0 +; ZIP-NEXT: li a0, 32 +; ZIP-NEXT: vnsrl.wx v16, v8, a0 +; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; ZIP-NEXT: ri.vzip2a.vv v8, v12, v16 +; ZIP-NEXT: ret +entry: + %0 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %arg) + %a = extractvalue { <8 x float>, <8 x float> } %0, 0 + %b = extractvalue { <8 x float>, <8 x float> } %0, 1 + %res = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b) + ret <16 x float> %res +} diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll new file mode 100644 index 0000000..960bbf5 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-fpclass.ll @@ -0,0 +1,526 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL +; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL +; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL + +; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included. + +define i1 @isnone_f(float %x) { +; X86-SDAGISEL-LABEL: isnone_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isnone_f: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) + ret i1 %0 +} + +define i1 @isany_f(float %x) { +; X86-SDAGISEL-LABEL: isany_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isany_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) + ret i1 %0 +} + +define i1 @issignaling_f(float %x) { +; X86-SDAGISEL-LABEL: issignaling_f: +; X86-SDAGISEL: # %bb.0: +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %cl +; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: andb %cl, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: issignaling_f: +; X64: # %bb.0: +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %cl +; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X64-NEXT: setge %al +; X64-NEXT: andb %cl, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: issignaling_f: +; X86-FASTISEL: # %bb.0: +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setl %cl +; X86-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: andb %cl, %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl + %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" + ret i1 %a0 +} + + define i1 @isquiet_f(float %x) { +; X86-SDAGISEL-LABEL: isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setge %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isquiet_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl + entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" + ret i1 %0 +} + +define i1 @not_isquiet_f(float %x) { +; X86-SDAGISEL-LABEL: not_isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isquiet_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-NEXT: setl %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isquiet_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-FASTISEL-NEXT: setl %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" + ret i1 %0 +} + +define i1 @isinf_f(float %x) { +; X86-SDAGISEL-LABEL: isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isinf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" + ret i1 %0 +} + +define i1 @not_isinf_f(float %x) { +; X86-SDAGISEL-LABEL: not_isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isinf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isinf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setne %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" + ret i1 %0 +} + +define i1 @is_plus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: is_plus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_plus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_plus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" + ret i1 %0 +} + +define i1 @is_minus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: sete %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_minus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 +; X86-FASTISEL-NEXT: sete %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" + ret i1 %0 +} + +define i1 @not_is_minus_inf_f(float %x) { +; X86-SDAGISEL-LABEL: not_is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_is_minus_inf_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-NEXT: setne %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_is_minus_inf_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000 +; X86-FASTISEL-NEXT: setne %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" + ret i1 %0 +} + +define i1 @isfinite_f(float %x) { +; X86-SDAGISEL-LABEL: isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setl %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isfinite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setl %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" + ret i1 %0 +} + +define i1 @not_isfinite_f(float %x) { +; X86-SDAGISEL-LABEL: not_isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: not_isfinite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setge %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: not_isfinite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-FASTISEL-NEXT: andl (%esp), %eax +; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setge %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" + ret i1 %0 +} + +define i1 @is_plus_finite_f(float %x) { +; X86-SDAGISEL-LABEL: is_plus_finite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setb %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: is_plus_finite_f: +; X64: # %bb.0: # %entry +; X64-NEXT: movd %xmm0, %eax +; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-NEXT: setb %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: is_plus_finite_f: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: pushl %eax +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8 +; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstps (%esp) +; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000 +; X86-FASTISEL-NEXT: setb %al +; X86-FASTISEL-NEXT: popl %ecx +; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4 +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" + ret i1 %0 +} + +define i1 @isnone_d(double %x) nounwind { +; X86-SDAGISEL-LABEL: isnone_d: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isnone_d: +; X64: # %bb.0: # %entry +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_d: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0) + ret i1 %0 +} + +define i1 @isany_d(double %x) nounwind { +; X86-SDAGISEL-LABEL: isany_d: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-LABEL: isany_d: +; X64: # %bb.0: # %entry +; X64-NEXT: movb $1, %al +; X64-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_d: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +entry: + %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023) + ret i1 %0 +} + +define i1 @isnone_f80(x86_fp80 %x) nounwind { +; X86-SDAGISEL-LABEL: isnone_f80: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: xorl %eax, %eax +; X86-SDAGISEL-NEXT: retl +; +; X64-SDAGISEL-LABEL: isnone_f80: +; X64-SDAGISEL: # %bb.0: # %entry +; X64-SDAGISEL-NEXT: xorl %eax, %eax +; X64-SDAGISEL-NEXT: retq +; +; X86-FASTISEL-LABEL: isnone_f80: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: xorl %eax, %eax +; X86-FASTISEL-NEXT: retl +; +; X64-FASTISEL-LABEL: isnone_f80: +; X64-FASTISEL: # %bb.0: # %entry +; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-FASTISEL-NEXT: fstp %st(0) +; X64-FASTISEL-NEXT: xorl %eax, %eax +; X64-FASTISEL-NEXT: retq +entry: +%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0) +ret i1 %0 +} + +define i1 @isany_f80(x86_fp80 %x) nounwind { +; X86-SDAGISEL-LABEL: isany_f80: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movb $1, %al +; X86-SDAGISEL-NEXT: retl +; +; X64-SDAGISEL-LABEL: isany_f80: +; X64-SDAGISEL: # %bb.0: # %entry +; X64-SDAGISEL-NEXT: movb $1, %al +; X64-SDAGISEL-NEXT: retq +; +; X86-FASTISEL-LABEL: isany_f80: +; X86-FASTISEL: # %bb.0: # %entry +; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-FASTISEL-NEXT: fstp %st(0) +; X86-FASTISEL-NEXT: movb $1, %al +; X86-FASTISEL-NEXT: retl +; +; X64-FASTISEL-LABEL: isany_f80: +; X64-FASTISEL: # %bb.0: # %entry +; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-FASTISEL-NEXT: fstp %st(0) +; X64-FASTISEL-NEXT: movb $1, %al +; X64-FASTISEL-NEXT: retq +entry: + %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023) + ret i1 %0 +} |