From 744c0057e7dc0d1d046a4867cece2f31fee9bb23 Mon Sep 17 00:00:00 2001 From: Nashe Mncube Date: Thu, 22 Feb 2024 19:15:52 +0000 Subject: [AArch64][CodeGen] Fix crash when fptrunc returns fp16 with +nofp attr (#81724) When performing lowering of the fptrunc opcode returning fp16 with the +nofp flag enabled we could trigger a compiler crash. This is because we had no custom lowering implemented. This patch the case in which we need to promote an fp16 return type for fptrunc when the +nofp attr is enabled. --- .../AArch64/16bit-float-promotion-with-nofp.ll | 31 ++++++ .../CodeGen/AArch64/strictfp_f16_abi_promote.ll | 115 ++++++++++++++++++--- 2 files changed, 129 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll (limited to 'llvm/test') diff --git a/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll new file mode 100644 index 0000000..bfe9ab8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/16bit-float-promotion-with-nofp.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64 -mattr=-fp-armv8 -o - %s | FileCheck %s + +define half @f2h(float %a) { +; CHECK-LABEL: f2h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __gnu_f2h_ieee +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = fptrunc float %a to half + ret half %0 +} + +define bfloat @f2bfloat(float %a) { +; CHECK-LABEL: f2bfloat: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __truncsfbf2 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = fptrunc float %a to bfloat + ret bfloat %0 +} + diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll index a34f7ab..9fa5208 100644 --- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll +++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll @@ -131,26 +131,107 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 { ret void } -; FIXME: -; define half @f16_return(float %arg) #0 { -; %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") -; ret half %fptrunc -; } + define half @f16_return(float %arg) #0 { +; NOFP16-LABEL: f16_return: +; NOFP16: // %bb.0: +; NOFP16-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; NOFP16-NEXT: .cfi_def_cfa_offset 16 +; NOFP16-NEXT: .cfi_offset w30, -16 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; NOFP16-NEXT: ret + %fptrunc = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret half %fptrunc + } -; define <2 x half> @v2f16_return(<2 x float> %arg) #0 { -; %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") -; ret <2 x half> %fptrunc -; } + define <2 x half> @v2f16_return(<2 x float> %arg) #0 { +; NOFP16-LABEL: v2f16_return: +; NOFP16: // %bb.0: +; NOFP16-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; NOFP16-NEXT: .cfi_def_cfa_offset 32 +; NOFP16-NEXT: .cfi_offset w19, -8 +; NOFP16-NEXT: .cfi_offset w20, -16 +; NOFP16-NEXT: .cfi_offset w30, -32 +; NOFP16-NEXT: mov w19, w0 +; NOFP16-NEXT: mov w0, w1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w20, w0 +; NOFP16-NEXT: mov w0, w19 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w1, w20 +; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; NOFP16-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; NOFP16-NEXT: ret + %fptrunc = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <2 x half> %fptrunc + } -; define <3 x half> @v3f16_return(<3 x float> %arg) #0 { -; %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") -; ret <3 x half> %fptrunc -; } + define <3 x half> @v3f16_return(<3 x float> %arg) #0 { +; NOFP16-LABEL: v3f16_return: +; NOFP16: // %bb.0: +; NOFP16-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; NOFP16-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; NOFP16-NEXT: .cfi_def_cfa_offset 32 +; NOFP16-NEXT: .cfi_offset w19, -8 +; NOFP16-NEXT: .cfi_offset w20, -16 +; NOFP16-NEXT: .cfi_offset w21, -24 +; NOFP16-NEXT: .cfi_offset w30, -32 +; NOFP16-NEXT: mov w20, w0 +; NOFP16-NEXT: mov w0, w2 +; NOFP16-NEXT: mov w19, w1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w21, w0 +; NOFP16-NEXT: mov w0, w19 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w19, w0 +; NOFP16-NEXT: mov w0, w20 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w1, w19 +; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; NOFP16-NEXT: mov w2, w21 +; NOFP16-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; NOFP16-NEXT: ret + %fptrunc = call <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <3 x half> %fptrunc + } -; define <4 x half> @v4f16_return(<4 x float> %arg) #0 { -; %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") -; ret <4 x half> %fptrunc -; } + define <4 x half> @v4f16_return(<4 x float> %arg) #0 { +; NOFP16-LABEL: v4f16_return: +; NOFP16: // %bb.0: +; NOFP16-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill +; NOFP16-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; NOFP16-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; NOFP16-NEXT: .cfi_def_cfa_offset 48 +; NOFP16-NEXT: .cfi_offset w19, -8 +; NOFP16-NEXT: .cfi_offset w20, -16 +; NOFP16-NEXT: .cfi_offset w21, -24 +; NOFP16-NEXT: .cfi_offset w22, -32 +; NOFP16-NEXT: .cfi_offset w30, -48 +; NOFP16-NEXT: mov w21, w0 +; NOFP16-NEXT: mov w0, w3 +; NOFP16-NEXT: mov w19, w2 +; NOFP16-NEXT: mov w20, w1 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w22, w0 +; NOFP16-NEXT: mov w0, w19 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w19, w0 +; NOFP16-NEXT: mov w0, w20 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w20, w0 +; NOFP16-NEXT: mov w0, w21 +; NOFP16-NEXT: bl __gnu_f2h_ieee +; NOFP16-NEXT: mov w1, w20 +; NOFP16-NEXT: mov w2, w19 +; NOFP16-NEXT: mov w3, w22 +; NOFP16-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; NOFP16-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; NOFP16-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; NOFP16-NEXT: ret + %fptrunc = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float> %arg, metadata !"round.tonearest", metadata !"fpexcept.strict") + ret <4 x half> %fptrunc + } ; FIXME: ; define void @outgoing_f16_arg(ptr %ptr) #0 { -- cgit v1.1