; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL define half @test_half(half %x, i32 %exp) nounwind { ; CHECK-LABEL: test_half: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: vcvtph2ps %xmm0, %xmm0 ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq entry: %r = tail call fast half @llvm.ldexp.f16.i32(half %x, i32 %exp) ret half %r } declare half @llvm.ldexp.f16.i32(half, i32) memory(none) define float @test_float(float %x, i32 %exp) nounwind { ; CHECK-LABEL: test_float: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: jmp ldexpf@PLT # TAILCALL entry: %r = tail call fast float @ldexpf(float %x, i32 %exp) ret float %r } declare float @ldexpf(float, i32) memory(none) define double @test_double(double %x, i32 %exp) nounwind { ; CHECK-LABEL: test_double: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: jmp ldexp@PLT # TAILCALL entry: %r = tail call fast double @ldexp(double %x, i32 %exp) ret double %r } declare double @ldexp(double, i32) memory(none) define fp128 @testExpl(fp128 %x, i32 %exp) nounwind { ; CHECK-LABEL: testExpl: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: jmp ldexpl@PLT # TAILCALL entry: %r = tail call fast fp128 @ldexpl(fp128 %x, i32 %exp) ret fp128 %r } declare fp128 @ldexpl(fp128, i32) memory(none) define <4 x float> @test_ldexp_4xfloat(<4 x float> %x, <4 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_4xfloat: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovd %xmm1, %edi ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> %x, <4 x i32> %exp) ret <4 x float> %r } declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) define <2 x double> @test_ldexp_2xdouble(<2 x double> %x, <2 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_2xdouble: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovd %xmm1, %edi ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq %r = call <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double> %x, <2 x i32> %exp) ret <2 x double> %r } declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) define <8 x float> @test_ldexp_8xfloat(<8 x float> %x, <8 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_8xfloat: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $120, %rsp ; CHECK-NEXT: vmovdqu %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1 ; CHECK-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovd %xmm1, %edi ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: addq $120, %rsp ; CHECK-NEXT: retq %r = call <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float> %x, <8 x i32> %exp) ret <8 x float> %r } declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>) define <4 x double> @test_ldexp_4xdouble(<4 x double> %x, <4 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_4xdouble: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $88, %rsp ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vextractps $2, %xmm1, %edi ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: addq $88, %rsp ; CHECK-NEXT: retq %r = call <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double> %x, <4 x i32> %exp) ret <4 x double> %r } declare <4 x double> @llvm.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>) define <16 x float> @test_ldexp_16xfloat(<16 x float> %x, <16 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_16xfloat: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $216, %rsp ; CHECK-NEXT: vmovdqu64 %zmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vextracti32x4 $3, %zmm1, %xmm1 ; CHECK-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovd %xmm1, %edi ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1 ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqu64 {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,1,3,3] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[3,3,3,3] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexpf@PLT ; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: vinsertf64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload ; CHECK-NEXT: addq $216, %rsp ; CHECK-NEXT: retq %r = call <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float> %x, <16 x i32> %exp) ret <16 x float> %r } declare <16 x float> @llvm.ldexp.v16f32.v16i32(<16 x float>, <16 x i32>) define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwind { ; CHECK-LABEL: test_ldexp_8xdouble: ; CHECK: # %bb.0: ; CHECK-NEXT: subq $184, %rsp ; CHECK-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovups %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill ; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm1 ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vextractps $2, %xmm1, %edi ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractf32x4 $2, %zmm0, %xmm1 ; CHECK-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovaps %xmm1, %xmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vextractps $2, %xmm0, %edi ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vextractps $3, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vmovapd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vmovd %xmm0, %edi ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK-NEXT: vextractps $1, %xmm0, %edi ; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload ; CHECK-NEXT: # xmm0 = mem[1,0] ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq ldexp@PLT ; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload ; CHECK-NEXT: vinsertf64x4 $1, {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 32-byte Folded Reload ; CHECK-NEXT: addq $184, %rsp ; CHECK-NEXT: retq %r = call <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double> %x, <8 x i32> %exp) ret <8 x double> %r } declare <8 x double> @llvm.ldexp.v8f64.v8i32(<8 x double>, <8 x i32>) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; AVX512: {{.*}} ; AVX512VL: {{.*}}