aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll249
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/build-vector.ll215
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/build-vector.ll74
-rw-r--r--llvm/test/CodeGen/Mips/abiflags-soft-float.ll12
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll16
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll58
-rw-r--r--llvm/test/CodeGen/X86/isel-fpclass.ll526
7 files changed, 962 insertions, 188 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll
index fb1e46d..ea9334a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll
@@ -1,13 +1,100 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel < %s | FileCheck --check-prefixes=GCN,GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel < %s | FileCheck --check-prefixes=GCN,GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-SDAG-STRESS %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942-STRESS,GFX942-GISEL-STRESS %s
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 {
+; GFX942-SDAG-LABEL: test_mfma_f32_16x16x8xf32:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 1.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 2.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4.0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[4:5], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-SDAG-NEXT: s_nop 6
+; GFX942-SDAG-NEXT: global_store_dwordx4 v2, a[0:3], s[6:7]
+; GFX942-SDAG-NEXT: s_endpgm
+;
+; GFX942-GISEL-LABEL: test_mfma_f32_16x16x8xf32:
+; GFX942-GISEL: ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0
+; GFX942-GISEL-NEXT: s_mov_b32 s5, 2.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x40400000
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; GFX942-GISEL-NEXT: s_mov_b32 s5, 4.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-GISEL-NEXT: s_nop 1
+; GFX942-GISEL-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-NEXT: s_nop 5
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GFX942-GISEL-NEXT: s_endpgm
+;
+; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_16x16x8xf32:
+; GFX942-SDAG-STRESS: ; %bb.0: ; %bb
+; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0
+; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v4, 0
+; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-SDAG-STRESS-NEXT: s_nop 1
+; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-SDAG-STRESS-NEXT: s_nop 6
+; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v4, a[0:3], s[6:7]
+; GFX942-SDAG-STRESS-NEXT: s_endpgm
+;
+; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_16x16x8xf32:
+; GFX942-GISEL-STRESS: ; %bb.0: ; %bb
+; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s2, 0x40400000
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s3, 4.0
+; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
+; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-STRESS-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-GISEL-STRESS-NEXT: s_nop 1
+; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-STRESS-NEXT: s_nop 5
+; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GFX942-GISEL-STRESS-NEXT: s_endpgm
bb:
%in.1 = load <4 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -16,6 +103,157 @@ bb:
}
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
+; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 1.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 2.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4.0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, s4
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, s5
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, s6
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, s7
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, s8
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, s9
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, s10
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, s11
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a12, s12
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a13, s13
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a14, s14
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a15, s15
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: s_nop 7
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
+; GFX942-SDAG-NEXT: s_endpgm
+;
+; GFX942-GISEL-LABEL: test_mfma_f32_32x32x4xf32:
+; GFX942-GISEL: ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
+; GFX942-GISEL-NEXT: s_mov_b32 s18, 1.0
+; GFX942-GISEL-NEXT: s_mov_b32 s19, 2.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
+; GFX942-GISEL-NEXT: s_mov_b32 s18, 0x40400000
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
+; GFX942-GISEL-NEXT: s_mov_b32 s19, 4.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19]
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a4, s4
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a5, s5
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a6, s6
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a7, s7
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a8, s8
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a9, s9
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a10, s10
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a11, s11
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a12, s12
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a13, s13
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a14, s14
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a15, s15
+; GFX942-GISEL-NEXT: s_nop 1
+; GFX942-GISEL-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-NEXT: s_nop 7
+; GFX942-GISEL-NEXT: s_nop 1
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-GISEL-NEXT: s_endpgm
+;
+; GFX942-SDAG-STRESS-LABEL: test_mfma_f32_32x32x4xf32:
+; GFX942-SDAG-STRESS: ; %bb.0: ; %bb
+; GFX942-SDAG-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 1.0
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v1, 2.0
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v2, 0x40400000
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v3, 4.0
+; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
+; GFX942-SDAG-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a4, s4
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a5, s5
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a6, s6
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a7, s7
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a8, s8
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a9, s9
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a10, s10
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a11, s11
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a12, s12
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a13, s13
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a14, s14
+; GFX942-SDAG-STRESS-NEXT: v_accvgpr_write_b32 a15, s15
+; GFX942-SDAG-STRESS-NEXT: s_nop 1
+; GFX942-SDAG-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-SDAG-STRESS-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-STRESS-NEXT: s_nop 7
+; GFX942-SDAG-STRESS-NEXT: s_nop 1
+; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-SDAG-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
+; GFX942-SDAG-STRESS-NEXT: s_endpgm
+;
+; GFX942-GISEL-STRESS-LABEL: test_mfma_f32_32x32x4xf32:
+; GFX942-GISEL-STRESS: ; %bb.0: ; %bb
+; GFX942-GISEL-STRESS-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
+; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-STRESS-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
+; GFX942-GISEL-STRESS-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a4, s4
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a5, s5
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a6, s6
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a7, s7
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a8, s8
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a9, s9
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a10, s10
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a11, s11
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a12, s12
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a13, s13
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a14, s14
+; GFX942-GISEL-STRESS-NEXT: v_accvgpr_write_b32 a15, s15
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 1.0
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 2.0
+; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s0, 0x40400000
+; GFX942-GISEL-STRESS-NEXT: s_mov_b32 s1, 4.0
+; GFX942-GISEL-STRESS-NEXT: v_mov_b64_e32 v[2:3], s[0:1]
+; GFX942-GISEL-STRESS-NEXT: s_nop 1
+; GFX942-GISEL-STRESS-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-GISEL-STRESS-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-STRESS-NEXT: s_nop 7
+; GFX942-GISEL-STRESS-NEXT: s_nop 1
+; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
+; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-GISEL-STRESS-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-GISEL-STRESS-NEXT: s_endpgm
bb:
%in.1 = load <16 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -25,6 +263,5 @@ bb:
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
; GFX942: {{.*}}
-; GISEL: {{.*}}
+; GFX942-STRESS: {{.*}}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 5130865..c18c637 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -436,49 +436,47 @@ entry:
define void @buildvector_v32i8_with_constant(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a8, i8 %a9, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind {
; CHECK-LABEL: buildvector_v32i8_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: ld.b $t0, $fp, 0
-; CHECK-NEXT: ld.b $t1, $fp, 8
-; CHECK-NEXT: ld.b $t2, $fp, 16
-; CHECK-NEXT: ld.b $t3, $fp, 24
-; CHECK-NEXT: ld.b $t4, $fp, 56
-; CHECK-NEXT: ld.b $t5, $fp, 32
-; CHECK-NEXT: ld.b $t6, $fp, 48
-; CHECK-NEXT: ld.b $t7, $fp, 40
-; CHECK-NEXT: st.b $t4, $sp, 63
-; CHECK-NEXT: st.b $zero, $sp, 61
-; CHECK-NEXT: st.b $t6, $sp, 60
-; CHECK-NEXT: st.b $t7, $sp, 59
-; CHECK-NEXT: st.b $zero, $sp, 56
-; CHECK-NEXT: st.b $t5, $sp, 55
-; CHECK-NEXT: st.b $t3, $sp, 54
-; CHECK-NEXT: st.b $zero, $sp, 53
-; CHECK-NEXT: st.b $t2, $sp, 52
-; CHECK-NEXT: st.b $zero, $sp, 51
-; CHECK-NEXT: st.b $t1, $sp, 50
-; CHECK-NEXT: st.b $t0, $sp, 49
-; CHECK-NEXT: st.b $zero, $sp, 48
-; CHECK-NEXT: st.b $a7, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.b $zero, $sp, 42
-; CHECK-NEXT: st.b $a6, $sp, 41
-; CHECK-NEXT: st.b $a5, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.b $a4, $sp, 37
-; CHECK-NEXT: st.h $zero, $sp, 35
-; CHECK-NEXT: st.b $a3, $sp, 34
-; CHECK-NEXT: st.b $a2, $sp, 33
-; CHECK-NEXT: st.b $a1, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ld.b $t0, $sp, 56
+; CHECK-NEXT: ld.b $t1, $sp, 48
+; CHECK-NEXT: ld.b $t2, $sp, 40
+; CHECK-NEXT: ld.b $t3, $sp, 32
+; CHECK-NEXT: ld.b $t4, $sp, 24
+; CHECK-NEXT: ld.b $t5, $sp, 16
+; CHECK-NEXT: ld.b $t6, $sp, 8
+; CHECK-NEXT: ld.b $t7, $sp, 0
+; CHECK-NEXT: xvrepli.b $xr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 5
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 8
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 9
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t7, 1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t6, 2
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t5, 4
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t4, 6
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t3, 7
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t2, 11
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t1, 12
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t0, 15
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0
@@ -624,32 +622,19 @@ entry:
define void @buildvector_v16i16_with_constant(ptr %dst, i16 %a2, i16 %a3, i16 %a5, i16 %a6, i16 %a7, i16 %a12, i16 %a13) nounwind {
; CHECK-LABEL: buildvector_v16i16_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.h $a7, $sp, 58
-; CHECK-NEXT: st.h $a6, $sp, 56
-; CHECK-NEXT: st.h $a5, $sp, 46
-; CHECK-NEXT: st.h $a4, $sp, 44
-; CHECK-NEXT: st.h $a3, $sp, 42
-; CHECK-NEXT: ori $a3, $zero, 2
-; CHECK-NEXT: st.h $a3, $sp, 40
-; CHECK-NEXT: st.h $a2, $sp, 38
-; CHECK-NEXT: st.h $a1, $sp, 36
-; CHECK-NEXT: lu12i.w $a1, 32
-; CHECK-NEXT: ori $a1, $a1, 2
-; CHECK-NEXT: st.w $a1, $sp, 60
-; CHECK-NEXT: st.w $a1, $sp, 32
-; CHECK-NEXT: lu32i.d $a1, 131074
-; CHECK-NEXT: st.d $a1, $sp, 48
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvrepli.h $xr0, 2
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 5
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 6
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 7
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a6, 4
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a7, 5
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <16 x i16> undef, i16 2, i32 0
@@ -724,24 +709,12 @@ entry:
define void @buildvector_v8i32_with_constant(ptr %dst, i32 %a2, i32 %a4, i32 %a5, i32 %a6) nounwind {
; CHECK-LABEL: buildvector_v8i32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 60
-; CHECK-NEXT: st.w $a4, $sp, 56
-; CHECK-NEXT: st.w $a3, $sp, 52
-; CHECK-NEXT: st.w $a2, $sp, 48
-; CHECK-NEXT: st.w $zero, $sp, 44
-; CHECK-NEXT: st.w $a1, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvrepli.b $xr0, 0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 4
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 5
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 6
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <8 x i32> undef, i32 0, i32 0
@@ -793,21 +766,10 @@ entry:
define void @buildvector_v4i64_with_constant(ptr %dst, i64 %a0, i64 %a2) nounwind {
; CHECK-LABEL: buildvector_v4i64_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.d $zero, $sp, 56
-; CHECK-NEXT: st.d $a2, $sp, 48
-; CHECK-NEXT: st.d $zero, $sp, 40
-; CHECK-NEXT: st.d $a1, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: xvrepli.b $xr0, 0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0
@@ -880,27 +842,17 @@ entry:
define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, float %a5, float %a7) nounwind {
; CHECK-LABEL: buildvector_v8f32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: fst.s $fa3, $sp, 60
-; CHECK-NEXT: fst.s $fa2, $sp, 52
-; CHECK-NEXT: fst.s $fa1, $sp, 40
-; CHECK-NEXT: fst.s $fa0, $sp, 36
-; CHECK-NEXT: vldi $vr0, -1280
-; CHECK-NEXT: fst.s $fa0, $sp, 56
+; CHECK-NEXT: # kill: def $f3 killed $f3 def $xr3
+; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
; CHECK-NEXT: lu12i.w $a1, 262144
-; CHECK-NEXT: lu52i.d $a1, $a1, 1024
-; CHECK-NEXT: st.d $a1, $sp, 44
-; CHECK-NEXT: fst.s $fa0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1
+; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1
+; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2
+; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5
+; CHECK-NEXT: xvinsve0.w $xr4, $xr3, 7
+; CHECK-NEXT: xvst $xr4, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <8 x float> undef, float 2.0, i32 0
@@ -956,21 +908,12 @@ entry:
define void @buildvector_v4f64_with_constant(ptr %dst, double %a0, double %a3) nounwind {
; CHECK-LABEL: buildvector_v4f64_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: fst.d $fa1, $sp, 56
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vst $vr1, $sp, 40
-; CHECK-NEXT: fst.d $fa0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: xvrepli.b $xr2, 0
+; CHECK-NEXT: xvinsve0.d $xr2, $xr0, 0
+; CHECK-NEXT: xvinsve0.d $xr2, $xr1, 3
+; CHECK-NEXT: xvst $xr2, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x double> undef, double %a0, i32 0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index 78588c5..9517558 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -307,23 +307,15 @@ entry:
define void @buildvector_v16i8_with_constant(ptr %dst, i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) nounwind {
; CHECK-LABEL: buildvector_v16i8_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.b $a7, $sp, 15
-; CHECK-NEXT: st.h $zero, $sp, 13
-; CHECK-NEXT: st.b $a6, $sp, 12
-; CHECK-NEXT: st.b $a5, $sp, 11
-; CHECK-NEXT: st.h $zero, $sp, 9
-; CHECK-NEXT: st.b $a4, $sp, 8
-; CHECK-NEXT: st.b $zero, $sp, 7
-; CHECK-NEXT: st.b $a3, $sp, 6
-; CHECK-NEXT: st.b $zero, $sp, 5
-; CHECK-NEXT: st.b $a2, $sp, 4
-; CHECK-NEXT: st.b $zero, $sp, 3
-; CHECK-NEXT: st.h $zero, $sp, 1
-; CHECK-NEXT: st.b $a1, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: vrepli.b $vr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 4
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 6
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 8
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 11
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 12
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0
@@ -398,16 +390,12 @@ entry:
define void @buildvector_v8i16_with_constant(ptr %dst, i16 %a0, i16 %a3, i16 %a4, i16 %a5) nounwind {
; CHECK-LABEL: buildvector_v8i16_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.h $zero, $sp, 12
-; CHECK-NEXT: st.h $a4, $sp, 10
-; CHECK-NEXT: st.h $a3, $sp, 8
-; CHECK-NEXT: st.h $a2, $sp, 6
-; CHECK-NEXT: st.h $zero, $sp, 2
-; CHECK-NEXT: st.h $a1, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vrepli.b $vr0, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 4
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 5
; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
@@ -459,15 +447,11 @@ entry:
define void @buildvector_v4i32_with_constant(ptr %dst, i32 %a0, i32 %a2, i32 %a3) nounwind {
; CHECK-LABEL: buildvector_v4i32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.w $a3, $sp, 12
-; CHECK-NEXT: st.w $a2, $sp, 8
-; CHECK-NEXT: ori $a2, $zero, 2
-; CHECK-NEXT: st.w $a2, $sp, 4
-; CHECK-NEXT: st.w $a1, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vrepli.w $vr0, 2
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 2
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 3
; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
@@ -508,9 +492,8 @@ entry:
define void @buildvector_v2i64_with_constant(ptr %dst, i64 %a1) nounwind {
; CHECK-LABEL: buildvector_v2i64_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.d $vr0, $zero, 0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a1, 0
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vrepli.b $vr0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -561,15 +544,14 @@ entry:
define void @buildvector_v4f32_with_constant(ptr %dst, float %a1, float %a2, float %a3) nounwind {
; CHECK-LABEL: buildvector_v4f32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: fst.s $fa2, $sp, 12
-; CHECK-NEXT: fst.s $fa1, $sp, 8
-; CHECK-NEXT: fst.s $fa0, $sp, 4
-; CHECK-NEXT: movgr2fr.w $fa0, $zero
-; CHECK-NEXT: fst.s $fa0, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vrepli.b $vr3, 0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 16
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 48
+; CHECK-NEXT: vst $vr3, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x float> undef, float 0.0, i32 0
diff --git a/llvm/test/CodeGen/Mips/abiflags-soft-float.ll b/llvm/test/CodeGen/Mips/abiflags-soft-float.ll
new file mode 100644
index 0000000..01821f2
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/abiflags-soft-float.ll
@@ -0,0 +1,12 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o tmp.o
+; RUN: llvm-readobj -A tmp.o | FileCheck %s -check-prefix=OBJ
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | \
+; RUN: FileCheck %s -check-prefix=ASM
+
+; OBJ: FP ABI: Soft float
+; ASM: .module softfloat
+
+define dso_local void @asm_is_null() "use-soft-float"="true" {
+ call void asm sideeffect "", ""()
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 021c737..fba592d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -634,3 +634,19 @@ define {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i
%deinterleaved.results = call {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} @llvm.vector.deinterleave4.nxv32i8(<vscale x 32 x i8> %vec)
ret {<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>} %deinterleaved.results
}
+
+define { <8 x float>, <8 x float> } @deinterleave_unrelated(<16 x float> %arg) {
+; CHECK-LABEL: deinterleave_unrelated:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v12, v8
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wx v10, v12, a0
+; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: ret
+entry:
+ %abs = call <16 x float> @llvm.fabs(<16 x float> %arg)
+ %res = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %abs)
+ ret { <8 x float>, <8 x float> } %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
index 0a96e4f..5b1746d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll
@@ -3744,3 +3744,61 @@ define <vscale x 1 x float> @vector_deinterleave_nxv1f32_nxv8f32_oneactive2(<vsc
%ext = extractvalue {<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x float>} %res, 5
ret <vscale x 1 x float> %ext
}
+
+
+define { <8 x float>, <8 x float> } @interleave_deinterleave2(<8 x float> %a, <8 x float> %b) {
+; V-LABEL: interleave_deinterleave2:
+; V: # %bb.0: # %entry
+; V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; V-NEXT: vwaddu.vv v12, v8, v10
+; V-NEXT: li a0, -1
+; V-NEXT: vwmaccu.vx v12, a0, v10
+; V-NEXT: li a0, 32
+; V-NEXT: vnsrl.wx v10, v12, a0
+; V-NEXT: vnsrl.wi v8, v12, 0
+; V-NEXT: ret
+;
+; ZIP-LABEL: interleave_deinterleave2:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZIP-NEXT: vmv2r.v v12, v10
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: ri.vzip2a.vv v16, v8, v12
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vnsrl.wx v10, v16, a0
+; ZIP-NEXT: vnsrl.wi v8, v16, 0
+; ZIP-NEXT: ret
+entry:
+ %0 = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b)
+ %1 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %0)
+ ret { <8 x float>, <8 x float> } %1
+}
+
+define <16 x float> @deinterleave_interleave2(<16 x float> %arg) {
+; V-LABEL: deinterleave_interleave2:
+; V: # %bb.0: # %entry
+; V-NEXT: li a0, 32
+; V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; V-NEXT: vnsrl.wi v12, v8, 0
+; V-NEXT: vnsrl.wx v14, v8, a0
+; V-NEXT: vwaddu.vv v8, v12, v14
+; V-NEXT: li a0, -1
+; V-NEXT: vwmaccu.vx v8, a0, v14
+; V-NEXT: ret
+;
+; ZIP-LABEL: deinterleave_interleave2:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vnsrl.wi v12, v8, 0
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vnsrl.wx v16, v8, a0
+; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v8, v12, v16
+; ZIP-NEXT: ret
+entry:
+ %0 = call { <8 x float>, <8 x float> } @llvm.vector.deinterleave2.v16f32(<16 x float> %arg)
+ %a = extractvalue { <8 x float>, <8 x float> } %0, 0
+ %b = extractvalue { <8 x float>, <8 x float> } %0, 1
+ %res = call <16 x float> @llvm.vector.interleave2.v16f32(<8 x float> %a, <8 x float> %b)
+ ret <16 x float> %res
+}
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
new file mode 100644
index 0000000..960bbf5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
+; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL
+; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL
+
+; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
+
+define i1 @isnone_f(float %x) {
+; X86-SDAGISEL-LABEL: isnone_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isnone_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @isany_f(float %x) {
+; X86-SDAGISEL-LABEL: isany_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isany_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @issignaling_f(float %x) {
+; X86-SDAGISEL-LABEL: issignaling_f:
+; X86-SDAGISEL: # %bb.0:
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %cl
+; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: andb %cl, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: issignaling_f:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %cl
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setge %al
+; X64-NEXT: andb %cl, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: issignaling_f:
+; X86-FASTISEL: # %bb.0:
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setl %cl
+; X86-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: andb %cl, %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+ %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
+ ret i1 %a0
+}
+
+ define i1 @isquiet_f(float %x) {
+; X86-SDAGISEL-LABEL: isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isquiet_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+ entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
+ ret i1 %0
+}
+
+define i1 @not_isquiet_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isquiet_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setl %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
+ ret i1 %0
+}
+
+define i1 @isinf_f(float %x) {
+; X86-SDAGISEL-LABEL: isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isinf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
+ ret i1 %0
+}
+
+define i1 @not_isinf_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isinf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setne %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
+ ret i1 %0
+}
+
+define i1 @is_plus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: is_plus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_plus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_plus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
+ ret i1 %0
+}
+
+define i1 @is_minus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_minus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
+ ret i1 %0
+}
+
+define i1 @not_is_minus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: not_is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_is_minus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
+; X86-FASTISEL-NEXT: setne %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
+ ret i1 %0
+}
+
+define i1 @isfinite_f(float %x) {
+; X86-SDAGISEL-LABEL: isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isfinite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setl %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
+ ret i1 %0
+}
+
+define i1 @not_isfinite_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isfinite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
+ ret i1 %0
+}
+
+define i1 @is_plus_finite_f(float %x) {
+; X86-SDAGISEL-LABEL: is_plus_finite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setb %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_plus_finite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_plus_finite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setb %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
+ ret i1 %0
+}
+
+define i1 @isnone_d(double %x) nounwind {
+; X86-SDAGISEL-LABEL: isnone_d:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isnone_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_d:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @isany_d(double %x) nounwind {
+; X86-SDAGISEL-LABEL: isany_d:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isany_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_d:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @isnone_f80(x86_fp80 %x) nounwind {
+; X86-SDAGISEL-LABEL: isnone_f80:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-SDAGISEL-LABEL: isnone_f80:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: xorl %eax, %eax
+; X64-SDAGISEL-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_f80:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isnone_f80:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-FASTISEL-NEXT: fstp %st(0)
+; X64-FASTISEL-NEXT: xorl %eax, %eax
+; X64-FASTISEL-NEXT: retq
+entry:
+%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0)
+ret i1 %0
+}
+
+define i1 @isany_f80(x86_fp80 %x) nounwind {
+; X86-SDAGISEL-LABEL: isany_f80:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-SDAGISEL-LABEL: isany_f80:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movb $1, %al
+; X64-SDAGISEL-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_f80:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isany_f80:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-FASTISEL-NEXT: fstp %st(0)
+; X64-FASTISEL-NEXT: movb $1, %al
+; X64-FASTISEL-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023)
+ ret i1 %0
+}