diff options
Diffstat (limited to 'llvm/test/CodeGen/ARM')
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp-intrinsics.ll | 169 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 968 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/strict-fp-int-promote.ll | 159 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/strict-fp-ops.ll | 202 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll | 270 |
5 files changed, 1743 insertions, 25 deletions
diff --git a/llvm/test/CodeGen/ARM/fp-intrinsics.ll b/llvm/test/CodeGen/ARM/fp-intrinsics.ll index 93b6a58..cb87508 100644 --- a/llvm/test/CodeGen/ARM/fp-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/fp-intrinsics.ll @@ -76,7 +76,6 @@ define i32 @fptosi_f32(float %x) #0 { ; CHECK-NOSP: bl __aeabi_f2iz ; CHECK-NOSP: bl __aeabi_f2iz ; CHECK-SP: vcvt.s32.f32 -; FIXME-CHECK-SP: vcvt.s32.f32 define void @fptosi_f32_twice(float %arg, ptr %ptr) #0 { entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %arg, metadata !"fpexcept.strict") #0 @@ -146,6 +145,80 @@ define float @tan_f32(float %x) #0 { ret float %val } +; CHECK-LABEL: acos_f32: +; CHECK: bl acosf +define float @acos_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.acos.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: asin_f32: +; CHECK: bl asinf +define float @asin_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.asin.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: atan_f32: +; CHECK: bl atanf +define float @atan_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.atan.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: cosh_f32: +; CHECK: bl coshf +define float @cosh_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.cosh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: sinh_f32: +; CHECK: bl sinhf +define float @sinh_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.sinh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: tanh_f32: +; CHECK: bl tanhf +define float @tanh_f32(float %x, float %y) #0 { + %val = call float @llvm.experimental.constrained.tanh.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: fmuladd_f32: +; CHECK-SP: vfma.f32 +; CHECK-NOSP: bl __aeabi_fmul +; CHECK-NOSP: bl __aeabi_fadd +define float @fmuladd_f32(float %x, float %y, float %z) #0 { + %val = call float @llvm.experimental.constrained.fmuladd.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: ldexp_f32: +; CHECK: bl ldexpf +define float @ldexp_f32(float %x, i32 %y) #0 { + %val = call float @llvm.experimental.constrained.ldexp.f32.i32(float %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: roundeven_f32: +; CHECK-SP-V8: vrintn.f32 +; CHECK-NOSP: bl roundevenf +define float @roundeven_f32(float %x) #0 { + %val = call float @llvm.experimental.constrained.roundeven.f32(float %x, metadata !"fpexcept.strict") #0 + ret float %val +} + +; CHECK-LABEL: uitofp_f32_i32: +; CHECK-NOSP: bl __aeabi_ui2f +; FIXME-CHECK-SP: vcvt.f32.f64 +define float @uitofp_f32_i32(i32 %x) #0 { + %val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %val +} + ; CHECK-LABEL: atan2_f32: ; CHECK: bl atan2f define float @atan2_f32(float %x, float %y) #0 { @@ -617,6 +690,80 @@ define double @tan_f64(double %x) #0 { ret double %val } +; CHECK-LABEL: acos_f64: +; CHECK: bl acos +define double @acos_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.acos.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: asin_f64: +; CHECK: bl asin +define double @asin_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.asin.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: atan_f64: +; CHECK: bl atan +define double @atan_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.atan.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: cosh_f64: +; CHECK: bl cosh +define double @cosh_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.cosh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: sinh_f64: +; CHECK: bl sinh +define double @sinh_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.sinh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: tanh_f64: +; CHECK: bl tanh +define double @tanh_f64(double %x, double %y) #0 { + %val = call double @llvm.experimental.constrained.tanh.f64(double %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: fmuladd_f64: +; CHECK-DP: vfma.f64 +; CHECK-NODP: bl __aeabi_dmul +; CHECK-NODP: bl __aeabi_dadd +define double @fmuladd_f64(double %x, double %y, double %z) #0 { + %val = call double @llvm.experimental.constrained.fmuladd.f64(double %x, double %y, double %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: ldexp_f64: +; CHECK: bl ldexp +define double @ldexp_f64(double %x, i32 %y) #0 { + %val = call double @llvm.experimental.constrained.ldexp.f64.i32(double %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: roundeven_f64: +; CHECK-DP-V8: vrintn.f64 +; CHECK-NODP: bl roundeven +define double @roundeven_f64(double %x) #0 { + %val = call double @llvm.experimental.constrained.roundeven.f64(double %x, metadata !"fpexcept.strict") #0 + ret double %val +} + +; CHECK-LABEL: uitofp_f64_i32: +; CHECK-NOSP: bl __aeabi_ui2d +; FIXME-CHECK-SP: vsub.f64 +define double @uitofp_f64_i32(i32 %x) #0 { + %val = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret double %val +} + ; CHECK-LABEL: atan2_f64: ; CHECK: bl atan2 define double @atan2_f64(double %x, double %y) #0 { @@ -1052,6 +1199,16 @@ declare float @llvm.experimental.constrained.powi.f32(float, i32, metadata, meta declare float @llvm.experimental.constrained.sin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.cos.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.fmuladd.f32(float, float, float, metadata, metadata) +declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) +declare float @llvm.experimental.constrained.roundeven.f32(float, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.pow.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.log.f32(float, metadata, metadata) @@ -1087,6 +1244,16 @@ declare double @llvm.experimental.constrained.powi.f64(double, i32, metadata, me declare double @llvm.experimental.constrained.sin.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.cos.f64(double, metadata, metadata) declare double @llvm.experimental.constrained.tan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.acos.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.asin.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.atan.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.cosh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.sinh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.tanh.f64(double, metadata, metadata) +declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) +declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) +declare double @llvm.experimental.constrained.roundeven.f64(double, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) declare double @llvm.experimental.constrained.atan2.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.pow.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.log.f64(double, metadata, metadata) diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 200b14b..b4060d5 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -98,12 +98,18 @@ define i32 @test_fptosi_i32(ptr %p) { ret i32 %r } -; FIXME -;define i64 @test_fptosi_i64(ptr %p) { -; %a = load half, ptr %p, align 2 -; %r = fptosi half %a to i64 -; ret i64 %r -;} +define i64 @test_fptosi_i64(ptr %p) { +; CHECK-LABEL: test_fptosi_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixhfdi +; CHECK-NEXT: pop {r11, pc} + %a = load half, ptr %p, align 2 + %r = fptosi half %a to i64 + ret i64 %r +} define i32 @test_fptoui_i32(ptr %p) { ; CHECK-LABEL: test_fptoui_i32: @@ -116,12 +122,18 @@ define i32 @test_fptoui_i32(ptr %p) { ret i32 %r } -; FIXME -;define i64 @test_fptoui_i64(ptr %p) { -; %a = load half, ptr %p, align 2 -; %r = fptoui half %a to i64 -; ret i64 %r -;} +define i64 @test_fptoui_i64(ptr %p) { +; CHECK-LABEL: test_fptoui_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: ldrh r0, [r0] +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixunshfdi +; CHECK-NEXT: pop {r11, pc} + %a = load half, ptr %p, align 2 + %r = fptoui half %a to i64 + ret i64 %r +} define void @test_sitofp_i32(i32 %a, ptr %p) { ; CHECK-LABEL: test_sitofp_i32: @@ -145,19 +157,31 @@ define void @test_uitofp_i32(i32 %a, ptr %p) { ret void } -; FIXME -;define void @test_sitofp_i64(i64 %a, ptr %p) { -; %r = sitofp i64 %a to half -; store half %r, ptr %p -; ret void -;} +define void @test_sitofp_i64(i64 %a, ptr %p) { +; CHECK-LABEL: test_sitofp_i64: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: bl __floatdihf +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %r = sitofp i64 %a to half + store half %r, ptr %p + ret void +} -; FIXME -;define void @test_uitofp_i64(i64 %a, ptr %p) { -; %r = uitofp i64 %a to half -; store half %r, ptr %p -; ret void -;} +define void @test_uitofp_i64(i64 %a, ptr %p) { +; CHECK-LABEL: test_uitofp_i64: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: bl __floatundihf +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %r = uitofp i64 %a to half + store half %r, ptr %p + ret void +} define void @test_fptrunc_float(float %f, ptr %p) { ; CHECK-LABEL: test_fptrunc_float: @@ -613,6 +637,902 @@ define void @test_fmuladd(ptr %p, ptr %q, ptr %r) { ret void } +; Half-precision intrinsics + +define half @add_f16(half %x, half %y) #0 { +; CHECK-LABEL: add_f16: +; CHECK: vadd.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fadd.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sub_f16(half %x, half %y) #0 { +; CHECK-LABEL: sub_f16: +; CHECK: vsub.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fsub.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @mul_f16(half %x, half %y) #0 { +; CHECK-LABEL: mul_f16: +; CHECK: vmul.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @div_f16(half %x, half %y) #0 { +; CHECK-LABEL: div_f16: +; CHECK: vdiv.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fdiv.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @frem_f16(half %x, half %y) #0 { +; CHECK-LABEL: frem_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: bl fmodf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.frem.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @fma_f16(half %x, half %y, half %z) #0 { +; CHECK-LABEL: fma_f16: +; CHECK: vfma.f16 s2, s0, s1 +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fma.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @fmuladd_f16(half %x, half %y, half %z) #0 { +; CHECK-LABEL: fmuladd_f16: +; CHECK: vfma.f16 s2, s0, s1 +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fmuladd.f16(half %x, half %y, half %z, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @fptosi_i32_f16(half %x) #0 { +; CHECK-LABEL: fptosi_i32_f16: +; CHECK: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i32 @fptoui_i32_f16(half %x) #0 { +; CHECK-LABEL: fptoui_i32_f16: +; CHECK: vcvt.s32.f16 s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @fptosi_i64_f16(half %x) #0 { +; CHECK-LABEL: fptosi_i64_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixhfdi +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define i64 @fptoui_i64_f16(half %x) #0 { +; CHECK-LABEL: fptoui_i64_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vmov.f16 r0, s0 +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: bl __fixunshfdi +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @sitofp_f16_i32(i32 %x) #0 { +; CHECK-LABEL: sitofp_f16_i32: +; CHECK: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: eor r0, r0, #-2147483648 +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: str r0, [sp] +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: vldr d16, .LCPI57_0 +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vsub.f64 d16, d17, d16 +; CHECK-NEXT: vcvtb.f16.f64 s0, d16 +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: .LCPI57_0: +; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .long 1127219200 + %val = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i32(i32 %x) #0 { +; CHECK-LABEL: uitofp_f16_i32: +; CHECK: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: str r0, [sp] +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: vldr d16, .LCPI58_0 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: vldr d17, [sp] +; CHECK-NEXT: vsub.f64 d16, d17, d16 +; CHECK-NEXT: vcvtb.f16.f64 s0, d16 +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: .LCPI58_0: +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 1127219200 + %val = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sitofp_f16_i64(i64 %x) #0 { +; CHECK-LABEL: sitofp_f16_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floatdihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i64(i64 %x) #0 { +; CHECK-LABEL: uitofp_f16_i64: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floatundihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sitofp_f16_i128(i128 %x) #0 { +; CHECK-LABEL: sitofp_f16_i128: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floattihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @uitofp_f16_i128(i128 %x) #0 { +; CHECK-LABEL: uitofp_f16_i128: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: bl __floatuntihf +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.uitofp.f16.i128(i128 %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sqrt_f16(half %x) #0 { +; CHECK-LABEL: sqrt_f16: +; CHECK: vsqrt.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.sqrt.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @powi_f16(half %x, i32 %y) #0 { +; CHECK-LABEL: powi_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl __powisf2 +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.powi.f16(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sin_f16(half %x) #0 { +; CHECK-LABEL: sin_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @cos_f16(half %x) #0 { +; CHECK-LABEL: cos_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.cos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @tan_f16(half %x) #0 { +; CHECK-LABEL: tan_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.tan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @asin_f16(half %x) #0 { +; CHECK-LABEL: asin_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl asinf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.asin.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @acos_f16(half %x) #0 { +; CHECK-LABEL: acos_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl acosf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.acos.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @atan_f16(half %x) #0 { +; CHECK-LABEL: atan_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl atanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.atan.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @atan2_f16(half %x, half %y) #0 { +; CHECK-LABEL: atan2_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: bl atan2f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.atan2.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @sinh_f16(half %x) #0 { +; CHECK-LABEL: sinh_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl sinhf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.sinh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @cosh_f16(half %x) #0 { +; CHECK-LABEL: cosh_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl coshf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.cosh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @tanh_f16(half %x) #0 { +; CHECK-LABEL: tanh_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanhf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.tanh.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @pow_f16(half %x, half %y) #0 { +; CHECK-LABEL: pow_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: vcvtb.f32.f16 s1, s1 +; CHECK-NEXT: bl powf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.pow.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log_f16(half %x) #0 { +; CHECK-LABEL: log_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl logf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.log.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log10_f16(half %x) #0 { +; CHECK-LABEL: log10_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl log10f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.log10.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @log2_f16(half %x) #0 { +; CHECK-LABEL: log2_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl log2f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.log2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @exp_f16(half %x) #0 { +; CHECK-LABEL: exp_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl expf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.exp.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @exp2_f16(half %x) #0 { +; CHECK-LABEL: exp2_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl exp2f +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.exp2.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @rint_f16(half %x) #0 { +; CHECK-LABEL: rint_f16: +; CHECK: vrintx.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.rint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @nearbyint_f16(half %x) #0 { +; CHECK-LABEL: nearbyint_f16: +; CHECK: vrintr.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.nearbyint.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @lrint_f16(half %x) #0 { +; CHECK-LABEL: lrint_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl lrintf +; CHECK-NEXT: pop {r11, pc} + %val = call i32 @llvm.experimental.constrained.lrint.i32.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @llrint_f16(half %x) #0 { +; CHECK-LABEL: llrint_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl llrintf +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.llrint.i64.f16(half %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @maxnum_f16(half %x, half %y) #0 { +; CHECK-LABEL: maxnum_f16: +; CHECK: vmaxnm.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.maxnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @minnum_f16(half %x, half %y) #0 { +; CHECK-LABEL: minnum_f16: +; CHECK: vminnm.f16 s0, s0, s1 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.minnum.f16(half %x, half %y, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @ceil_f16(half %x) #0 { +; CHECK-LABEL: ceil_f16: +; CHECK: vrintp.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.ceil.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @floor_f16(half %x) #0 { +; CHECK-LABEL: floor_f16: +; CHECK: vrintm.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.floor.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @lround_f16(half %x) #0 { +; CHECK-LABEL: lround_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl lroundf +; CHECK-NEXT: pop {r11, pc} + %val = call i32 @llvm.experimental.constrained.lround.i32.f16(half %x, metadata !"fpexcept.strict") #0 + ret i32 %val +} + +define i64 @llround_f16(half %x) #0 { +; CHECK-LABEL: llround_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl llroundf +; CHECK-NEXT: pop {r11, pc} + %val = call i64 @llvm.experimental.constrained.llround.i64.f16(half %x, metadata !"fpexcept.strict") #0 + ret i64 %val +} + +define half @round_f16(half %x) #0 { +; CHECK-LABEL: round_f16: +; CHECK: vrinta.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.round.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @roundeven_f16(half %x) #0 { +; CHECK-LABEL: roundeven_f16: +; CHECK: vrintn.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.roundeven.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @trunc_f16(half %x) #0 { +; CHECK-LABEL: trunc_f16: +; CHECK: vrintz.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.trunc.f16(half %x, metadata !"fpexcept.strict") #0 + ret half %val +} + +define half @ldexp_f16(half %x, i32 %y) #0 { +; CHECK-LABEL: ldexp_f16: +; CHECK: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl ldexpf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: pop {r11, pc} + %val = call half @llvm.experimental.constrained.ldexp.f16.i32(half %x, i32 %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define i32 @fcmp_olt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_olt_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ole_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ole_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwls r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ogt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ogt_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_oge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_oge_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwge r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_oeq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_oeq_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_one_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_one_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ult_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ult_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ule_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ule_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwle r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ugt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ugt_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_uge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_uge_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwpl r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_ueq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_ueq_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: movwvs r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmp_une_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmp_une_f16: +; CHECK: vcmp.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_olt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_olt_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"olt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ole_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ole_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwls r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ole", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ogt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ogt_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ogt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_oge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_oge_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwge r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_oeq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_oeq_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"oeq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_one_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_one_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwmi r0, #1 +; CHECK-NEXT: movwgt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ult_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ult_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ule_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ule_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwle r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ugt_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ugt_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwhi r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_uge_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_uge_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwpl r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_ueq_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_ueq_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: movwvs r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + +define i32 @fcmps_une_f16(half %a, half %b) #0 { +; CHECK-LABEL: fcmps_une_f16: +; CHECK: vcmpe.f16 s0, s1 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwne r0, #1 +; CHECK-NEXT: bx lr + %cmp = call i1 @llvm.experimental.constrained.fcmps.f16(half %a, half %b, metadata !"une", metadata !"fpexcept.strict") #0 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + + +; Intrinsics to convert between floating-point types + +define half @fptrunc_f16_f32(float %x) #0 { +; CHECK-LABEL: fptrunc_f16_f32: +; CHECK: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: bx lr + %val = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret half %val +} + +define float @fpext_f32_f16(half %x) #0 { +; CHECK-LABEL: fpext_f32_f16: +; CHECK: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bx lr + %val = call float @llvm.experimental.constrained.fpext.f32.f16(half %x, metadata !"fpexcept.strict") #0 + ret float %val +} + + +attributes #0 = { strictfp } + +declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fdiv.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.frem.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata) +declare half @llvm.experimental.constrained.fmuladd.f16(half, half, half, metadata, metadata) +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) +declare half @llvm.experimental.constrained.sitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.uitofp.f16.i128(i128, metadata, metadata) +declare half @llvm.experimental.constrained.sqrt.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.powi.f16(half, i32, metadata, metadata) +declare half @llvm.experimental.constrained.sin.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.cos.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.tan.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.pow.f16(half, half, metadata, metadata) +declare half @llvm.experimental.constrained.log.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log10.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.log2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.exp2.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.rint.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.nearbyint.f16(half, metadata, metadata) +declare i32 @llvm.experimental.constrained.lrint.i32.f16(half, metadata, metadata) +declare i64 @llvm.experimental.constrained.llrint.i64.f16(half, metadata, metadata) +declare half @llvm.experimental.constrained.maxnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.minnum.f16(half, half, metadata) +declare half @llvm.experimental.constrained.ceil.f16(half, metadata) +declare half @llvm.experimental.constrained.floor.f16(half, metadata) +declare i32 @llvm.experimental.constrained.lround.i32.f16(half, metadata) +declare i64 @llvm.experimental.constrained.llround.i64.f16(half, metadata) +declare half @llvm.experimental.constrained.round.f16(half, metadata) +declare half @llvm.experimental.constrained.roundeven.f16(half, metadata) +declare half @llvm.experimental.constrained.trunc.f16(half, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f16(half, half, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f16(half, half, metadata, metadata) + +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) + + declare half @llvm.sqrt.f16(half %a) declare half @llvm.powi.f16.i32(half %a, i32 %b) declare half @llvm.sin.f16(half %a) diff --git a/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll b/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll new file mode 100644 index 0000000..6e5b589 --- /dev/null +++ b/llvm/test/CodeGen/ARM/strict-fp-int-promote.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple armv7-- -mattr=+vfp4 -O0 -o - %s | FileCheck %s +; RUN: llc -mtriple armv7-- -mattr=+vfp4 -O3 -o - %s | FileCheck %s --check-prefix=CHECK-O3 + +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) + +define i32 @test(i32 %a, i16 %b) #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: sxth r0, r1 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: eor r2, r2, #-2147483648 +; CHECK-NEXT: str r2, [sp] +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vldr d17, .LCPI0_0 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-NEXT: str r1, [sp, #12] +; CHECK-NEXT: eor r0, r0, #-2147483648 +; CHECK-NEXT: str r0, [sp, #8] +; CHECK-NEXT: vldr d16, [sp, #8] +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-NEXT: vcmp.f32 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI0_0: +; CHECK-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-NEXT: .long 1127219200 +; +; CHECK-O3-LABEL: test: +; CHECK-O3: @ %bb.0: @ %entry +; CHECK-O3-NEXT: sub sp, sp, #16 +; CHECK-O3-NEXT: sxth r1, r1 +; CHECK-O3-NEXT: movw r2, #0 +; CHECK-O3-NEXT: movt r2, #17200 +; CHECK-O3-NEXT: str r2, [sp, #4] +; CHECK-O3-NEXT: eor r0, r0, #-2147483648 +; CHECK-O3-NEXT: str r0, [sp] +; CHECK-O3-NEXT: vldr d16, [sp] +; CHECK-O3-NEXT: vldr d17, .LCPI0_0 +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-O3-NEXT: str r2, [sp, #12] +; CHECK-O3-NEXT: eor r0, r1, #-2147483648 +; CHECK-O3-NEXT: str r0, [sp, #8] +; CHECK-O3-NEXT: vldr d16, [sp, #8] +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-O3-NEXT: vcmp.f32 s0, s2 +; CHECK-O3-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-O3-NEXT: mov r0, #0 +; CHECK-O3-NEXT: movweq r0, #1 +; CHECK-O3-NEXT: add sp, sp, #16 +; CHECK-O3-NEXT: bx lr +; CHECK-O3-NEXT: .p2align 3 +; CHECK-O3-NEXT: @ %bb.1: +; CHECK-O3-NEXT: .LCPI0_0: +; CHECK-O3-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-O3-NEXT: .long 1127219200 +entry: + %conv = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %conv1 = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %conv, float %conv1, metadata !"oeq", metadata !"fpexcept.strict") #1 + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +define i32 @test2(i32 %a, i16 %b) #0 { +; CHECK-LABEL: test2: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: uxth r0, r1 +; CHECK-NEXT: movw r1, #0 +; CHECK-NEXT: movt r1, #17200 +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: eor r2, r2, #-2147483648 +; CHECK-NEXT: str r2, [sp] +; CHECK-NEXT: vldr d16, [sp] +; CHECK-NEXT: vldr d17, .LCPI1_0 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-NEXT: str r1, [sp, #12] +; CHECK-NEXT: str r0, [sp, #8] +; CHECK-NEXT: vldr d16, [sp, #8] +; CHECK-NEXT: vldr d17, .LCPI1_1 +; CHECK-NEXT: vsub.f64 d16, d16, d17 +; CHECK-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-NEXT: vcmp.f32 s0, s2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movweq r0, #1 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-NEXT: .long 1127219200 +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .long 0 @ double 4503599627370496 +; CHECK-NEXT: .long 1127219200 +; +; CHECK-O3-LABEL: test2: +; CHECK-O3: @ %bb.0: @ %entry +; CHECK-O3-NEXT: sub sp, sp, #16 +; CHECK-O3-NEXT: uxth r1, r1 +; CHECK-O3-NEXT: movw r2, #0 +; CHECK-O3-NEXT: movt r2, #17200 +; CHECK-O3-NEXT: str r2, [sp, #4] +; CHECK-O3-NEXT: eor r0, r0, #-2147483648 +; CHECK-O3-NEXT: str r0, [sp] +; CHECK-O3-NEXT: vldr d16, [sp] +; CHECK-O3-NEXT: vldr d17, .LCPI1_0 +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s0, d16 +; CHECK-O3-NEXT: str r2, [sp, #12] +; CHECK-O3-NEXT: str r1, [sp, #8] +; CHECK-O3-NEXT: vldr d16, [sp, #8] +; CHECK-O3-NEXT: vldr d17, .LCPI1_1 +; CHECK-O3-NEXT: vsub.f64 d16, d16, d17 +; CHECK-O3-NEXT: vcvt.f32.f64 s2, d16 +; CHECK-O3-NEXT: vcmp.f32 s0, s2 +; CHECK-O3-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-O3-NEXT: mov r0, #0 +; CHECK-O3-NEXT: movweq r0, #1 +; CHECK-O3-NEXT: add sp, sp, #16 +; CHECK-O3-NEXT: bx lr +; CHECK-O3-NEXT: .p2align 3 +; CHECK-O3-NEXT: @ %bb.1: +; CHECK-O3-NEXT: .LCPI1_0: +; CHECK-O3-NEXT: .long 2147483648 @ double 4503601774854144 +; CHECK-O3-NEXT: .long 1127219200 +; CHECK-O3-NEXT: .LCPI1_1: +; CHECK-O3-NEXT: .long 0 @ double 4503599627370496 +; CHECK-O3-NEXT: .long 1127219200 +entry: + %conv = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %conv1 = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %b, metadata !"round.tonearest", metadata !"fpexcept.strict") #1 + %cmp = call i1 @llvm.experimental.constrained.fcmp.f32(float %conv, float %conv1, metadata !"oeq", metadata !"fpexcept.strict") #1 + %conv2 = zext i1 %cmp to i32 + ret i32 %conv2 +} + +attributes #0 = { strictfp noinline optnone } +attributes #1 = { strictfp } diff --git a/llvm/test/CodeGen/ARM/strict-fp-ops.ll b/llvm/test/CodeGen/ARM/strict-fp-ops.ll new file mode 100644 index 0000000..608ab07 --- /dev/null +++ b/llvm/test/CodeGen/ARM/strict-fp-ops.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple armv7-- -mattr=+vfp4 %s -o - | FileCheck %s + + +; Div whose result is unused should be removed unless we have strict exceptions + +define void @unused_div(float %x, float %y) { +; CHECK-LABEL: unused_div: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %add = fdiv float %x, %y + ret void +} + +define void @unused_div_fpexcept_strict(float %x, float %y) #0 { +; CHECK-LABEL: unused_div_fpexcept_strict: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vdiv.f32 s0, s2, s0 +; CHECK-NEXT: bx lr +entry: + %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret void +} + +define void @unused_div_round_dynamic(float %x, float %y) #0 { +; CHECK-LABEL: unused_div_round_dynamic: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %add = call float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret void +} + + +; Machine CSE should eliminate the second add unless we have strict exceptions + +define float @add_twice(float %x, float %y, i32 %n) { +; CHECK-LABEL: add_twice: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vmul.f32 s2, s0, s0 +; CHECK-NEXT: vmoveq.f32 s2, s0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: bx lr +entry: + %add = fadd float %x, %y + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = fadd float %x, %y + %mul = fmul float %add, %add1 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + +define float @add_twice_fpexcept_strict(float %x, float %y, i32 %n) #0 { +; CHECK-LABEL: add_twice_fpexcept_strict: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vmov s4, r0 +; CHECK-NEXT: vadd.f32 s0, s4, s2 +; CHECK-NEXT: vaddne.f32 s2, s4, s2 +; CHECK-NEXT: vmulne.f32 s0, s0, s2 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + +define float @add_twice_round_dynamic(float %x, float %y, i32 %n) #0 { +; CHECK-LABEL: add_twice_round_dynamic: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vmulne.f32 s0, s0, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +entry: + %add = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %tobool.not = icmp eq i32 %n, 0 + br i1 %tobool.not, label %if.end, label %if.then + +if.then: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + %mul = call float @llvm.experimental.constrained.fmul.f32(float %add, float %add1, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + br label %if.end + +if.end: + %a.0 = phi float [ %mul, %if.then ], [ %add, %entry ] + ret float %a.0 +} + +; Two adds separated by llvm.set.rounding should be preserved when rounding is +; dynamic (as they may give different results) or when we have strict exceptions +; (the llvm.set.rounding is irrelevant, but both could trap). + +define float @set_rounding(float %x, float %y) { +; CHECK-LABEL: set_rounding: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmrs r2, fpscr +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vsub.f32 s0, s0, s0 +; CHECK-NEXT: orr r0, r2, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmrs r1, fpscr +; CHECK-NEXT: bic r1, r1, #12582912 +; CHECK-NEXT: vmsr fpscr, r1 +; CHECK-NEXT: bx lr +entry: + %add1 = fadd float %x, %y + call void @llvm.set.rounding(i32 0) + %add2 = fadd float %x, %y + call void @llvm.set.rounding(i32 1) + %sub = fsub float %add1, %add2 + ret float %sub +} + +define float @set_rounding_fpexcept_strict(float %x, float %y) #0 { +; CHECK-LABEL: set_rounding_fpexcept_strict: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vadd.f32 s4, s2, s0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: orr r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: bic r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vsub.f32 s0, s4, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +entry: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + call void @llvm.set.rounding(i32 0) #0 + %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + call void @llvm.set.rounding(i32 1) #0 + %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 + ret float %sub +} + +define float @set_rounding_round_dynamic(float %x, float %y) #0 { +; CHECK-LABEL: set_rounding_round_dynamic: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vadd.f32 s4, s2, s0 +; CHECK-NEXT: orr r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vmrs r0, fpscr +; CHECK-NEXT: vadd.f32 s0, s2, s0 +; CHECK-NEXT: bic r0, r0, #12582912 +; CHECK-NEXT: vmsr fpscr, r0 +; CHECK-NEXT: vsub.f32 s0, s4, s0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bx lr +entry: + %add1 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @llvm.set.rounding(i32 0) #0 + %add2 = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + call void @llvm.set.rounding(i32 1) #0 + %sub = call float @llvm.experimental.constrained.fsub.f32(float %add1, float %add2, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 + ret float %sub +} + +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) +declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) +declare i32 @llvm.get.rounding() +declare void @llvm.set.rounding(i32) + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll new file mode 100644 index 0000000..5906c79 --- /dev/null +++ b/llvm/test/CodeGen/ARM/strictfp_f16_abi_promote.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=armv7-- < %s | FileCheck -check-prefix=NOFP16 %s + +declare void @f16_user(half) +declare half @f16_result() + +declare void @v2f16_user(<2 x half>) +declare <2 x half> @v2f16_result() + +declare void @v4f16_user(<4 x half>) +declare <4 x half> @v4f16_result() + +declare void @v8f16_user(<8 x half>) +declare <8 x half> @v8f16_result() + +define void @f16_arg(half %arg, ptr %ptr) #0 { +; NOFP16-LABEL: f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, lr} +; NOFP16-NEXT: uxth r0, r0 +; NOFP16-NEXT: mov r4, r1 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4] +; NOFP16-NEXT: pop {r4, pc} + %fpext = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") + store float %fpext, ptr %ptr + ret void +} + +define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 { +; NOFP16-LABEL: v2f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r11, lr} +; NOFP16-NEXT: vpush {d8} +; NOFP16-NEXT: mov r5, r0 +; NOFP16-NEXT: uxth r0, r1 +; NOFP16-NEXT: mov r4, r2 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: uxth r1, r5 +; NOFP16-NEXT: vmov s17, r0 +; NOFP16-NEXT: mov r0, r1 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: vmov s16, r0 +; NOFP16-NEXT: vstr d8, [r4] +; NOFP16-NEXT: vpop {d8} +; NOFP16-NEXT: pop {r4, r5, r11, pc} + %fpext = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") + store <2 x float> %fpext, ptr %ptr + ret void +} + +define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 { +; NOFP16-LABEL: v3f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, lr} +; NOFP16-NEXT: vpush {d8} +; NOFP16-NEXT: mov r6, r0 +; NOFP16-NEXT: uxth r0, r1 +; NOFP16-NEXT: mov r4, r3 +; NOFP16-NEXT: mov r5, r2 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: uxth r1, r6 +; NOFP16-NEXT: vmov s17, r0 +; NOFP16-NEXT: mov r0, r1 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: vmov s16, r0 +; NOFP16-NEXT: uxth r0, r5 +; NOFP16-NEXT: vst1.32 {d8}, [r4:64]! +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: str r0, [r4] +; NOFP16-NEXT: vpop {d8} +; NOFP16-NEXT: pop {r4, r5, r6, pc} + %fpext = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") + store <3 x float> %fpext, ptr %ptr + ret void +} + +define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 { +; NOFP16-LABEL: v4f16_arg: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, r7, r11, lr} +; NOFP16-NEXT: vpush {d8, d9} +; NOFP16-NEXT: mov r6, r0 +; NOFP16-NEXT: uxth r0, r1 +; NOFP16-NEXT: mov r4, r3 +; NOFP16-NEXT: mov r5, r2 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: mov r7, r0 +; NOFP16-NEXT: uxth r0, r4 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: vmov s19, r0 +; NOFP16-NEXT: uxth r0, r5 +; NOFP16-NEXT: ldr r4, [sp, #40] +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: vmov s18, r0 +; NOFP16-NEXT: uxth r0, r6 +; NOFP16-NEXT: vmov s17, r7 +; NOFP16-NEXT: bl __gnu_h2f_ieee +; NOFP16-NEXT: vmov s16, r0 +; NOFP16-NEXT: vst1.64 {d8, d9}, [r4] +; NOFP16-NEXT: vpop {d8, d9} +; NOFP16-NEXT: pop {r4, r5, r6, r7, r11, pc} + %fpext = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half> %arg, metadata !"fpexcept.strict") + store <4 x float> %fpext, ptr %ptr + ret void +} + + define half @f16_return(float %arg) #0 { +; NOFP16-LABEL: f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r11, lr} +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: pop {r11, pc} + %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret half %fptrunc + } + + define <2 x half> @v2f16_return(<2 x float> %arg) #0 { +; NOFP16-LABEL: v2f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r11, lr} +; NOFP16-NEXT: vpush {d8} +; NOFP16-NEXT: sub sp, sp, #8 +; NOFP16-NEXT: vmov d8, r0, r1 +; NOFP16-NEXT: vmov r0, s17 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: vmov r1, s16 +; NOFP16-NEXT: strh r0, [sp, #6] +; NOFP16-NEXT: mov r0, r1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: strh r0, [sp, #4] +; NOFP16-NEXT: add r0, sp, #4 +; NOFP16-NEXT: vld1.32 {d16[0]}, [r0:32] +; NOFP16-NEXT: vmovl.u16 q8, d16 +; NOFP16-NEXT: vmov.32 r0, d16[0] +; NOFP16-NEXT: vmov.32 r1, d16[1] +; NOFP16-NEXT: add sp, sp, #8 +; NOFP16-NEXT: vpop {d8} +; NOFP16-NEXT: pop {r11, pc} + %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <2 x half> %fptrunc + } + + define <3 x half> @v3f16_return(<3 x float> %arg) #0 { +; NOFP16-LABEL: v3f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r6, lr} +; NOFP16-NEXT: vmov d1, r2, r3 +; NOFP16-NEXT: mov r5, r0 +; NOFP16-NEXT: vmov d0, r0, r1 +; NOFP16-NEXT: mov r4, r1 +; NOFP16-NEXT: vmov r0, s2 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: uxth r6, r0 +; NOFP16-NEXT: mov r0, r4 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: mov r0, r5 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: pkhbt r0, r0, r4, lsl #16 +; NOFP16-NEXT: vmov d16, r0, r6 +; NOFP16-NEXT: vmov.u16 r0, d16[0] +; NOFP16-NEXT: vmov.u16 r1, d16[1] +; NOFP16-NEXT: vmov.u16 r2, d16[2] +; NOFP16-NEXT: pop {r4, r5, r6, pc} + %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <3 x half> %fptrunc + } + + define <4 x half> @v4f16_return(<4 x float> %arg) #0 { +; NOFP16-LABEL: v4f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r5, r11, lr} +; NOFP16-NEXT: vpush {d8, d9} +; NOFP16-NEXT: vmov d8, r2, r3 +; NOFP16-NEXT: vmov d9, r0, r1 +; NOFP16-NEXT: vmov r2, s17 +; NOFP16-NEXT: mov r0, r2 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: vmov r0, s16 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: vmov r1, s19 +; NOFP16-NEXT: pkhbt r5, r0, r4, lsl #16 +; NOFP16-NEXT: mov r0, r1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: vmov r0, s18 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: pkhbt r0, r0, r4, lsl #16 +; NOFP16-NEXT: vmov d16, r0, r5 +; NOFP16-NEXT: vmov.u16 r0, d16[0] +; NOFP16-NEXT: vmov.u16 r1, d16[1] +; NOFP16-NEXT: vmov.u16 r2, d16[2] +; NOFP16-NEXT: vmov.u16 r3, d16[3] +; NOFP16-NEXT: vpop {d8, d9} +; NOFP16-NEXT: pop {r4, r5, r11, pc} + %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <4 x half> %fptrunc + } + +define void @outgoing_v4f16_return(ptr %ptr) #0 { +; NOFP16-LABEL: outgoing_v4f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, lr} +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: bl v4f16_result +; NOFP16-NEXT: strh r3, [r4, #6] +; NOFP16-NEXT: strh r2, [r4, #4] +; NOFP16-NEXT: strh r1, [r4, #2] +; NOFP16-NEXT: strh r0, [r4] +; NOFP16-NEXT: pop {r4, pc} + %val = call <4 x half> @v4f16_result() #0 + store <4 x half> %val, ptr %ptr + ret void +} + +define void @outgoing_v8f16_return(ptr %ptr) #0 { +; NOFP16-LABEL: outgoing_v8f16_return: +; NOFP16: @ %bb.0: +; NOFP16-NEXT: push {r4, r10, r11, lr} +; NOFP16-NEXT: add r11, sp, #8 +; NOFP16-NEXT: sub sp, sp, #16 +; NOFP16-NEXT: bfc sp, #0, #4 +; NOFP16-NEXT: mov r4, r0 +; NOFP16-NEXT: mov r0, sp +; NOFP16-NEXT: bl v8f16_result +; NOFP16-NEXT: ldm sp, {r0, r1, r2, r3} +; NOFP16-NEXT: stm r4, {r0, r1, r2, r3} +; NOFP16-NEXT: sub sp, r11, #8 +; NOFP16-NEXT: pop {r4, r10, r11, pc} + %val = call <8 x half> @v8f16_result() #0 + store <8 x half> %val, ptr %ptr + ret void +} + +define half @call_split_type_used_outside_block_v8f16() #0 { +; NOFP16-LABEL: call_split_type_used_outside_block_v8f16: +; NOFP16: @ %bb.0: @ %bb0 +; NOFP16-NEXT: push {r4, r10, r11, lr} +; NOFP16-NEXT: add r11, sp, #8 +; NOFP16-NEXT: sub sp, sp, #16 +; NOFP16-NEXT: bfc sp, #0, #4 +; NOFP16-NEXT: mov r4, sp +; NOFP16-NEXT: mov r0, r4 +; NOFP16-NEXT: bl v8f16_result +; NOFP16-NEXT: vld1.32 {d16[0]}, [r4:32] +; NOFP16-NEXT: vmov.u16 r0, d16[0] +; NOFP16-NEXT: sub sp, r11, #8 +; NOFP16-NEXT: pop {r4, r10, r11, pc} +bb0: + %split.ret.type = call <8 x half> @v8f16_result() #0 + br label %bb1 + +bb1: + %extract = extractelement <8 x half> %split.ret.type, i32 0 + ret half %extract +} + +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0 +declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0 +declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0 +declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0 + +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0 +declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0 +declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0 +declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0 + +attributes #0 = { strictfp } |
