From d075b7bbace8c0ef983ea6f9aad175bec3ede729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?W=C3=81NG=20Xu=C4=9Bru=C3=AC?= Date: Thu, 6 Jun 2024 20:49:54 +0800 Subject: [LoongArch] Allow f16 codegen with expansion to libcalls (#94456) The test case is adapted from llvm/test/CodeGen/RISCV/fp16-promote.ll, because it covers some more IR patterns that ought to be common. Fixes #93894 --- .../lib/Target/LoongArch/LoongArchISelLowering.cpp | 8 + llvm/test/CodeGen/LoongArch/fp16-promote.ll | 326 +++++++++++++++++++++ 2 files changed, 334 insertions(+) create mode 100644 llvm/test/CodeGen/LoongArch/fp16-promote.ll (limited to 'llvm') diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 32e02e3..9d7e463 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -171,6 +171,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // Set operations for 'F' feature. if (Subtarget.hasBasicF()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::f16, Expand); setCondCodeAction(FPCCToExpand, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); @@ -186,6 +188,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand); if (Subtarget.is64Bit()) setOperationAction(ISD::FRINT, MVT::f32, Legal); @@ -202,7 +206,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // Set operations for 'D' feature. if (Subtarget.hasBasicD()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); @@ -219,6 +225,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); if (Subtarget.is64Bit()) setOperationAction(ISD::FRINT, MVT::f64, Legal); diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll new file mode 100644 index 0000000..75f920b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll @@ -0,0 +1,326 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch32 --mattr=+d < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s --check-prefix=LA64 + +define void @test_load_store(ptr %p, ptr %q) nounwind { +; LA32-LABEL: test_load_store: +; LA32: # %bb.0: +; LA32-NEXT: ld.h $a0, $a0, 0 +; LA32-NEXT: st.h $a0, $a1, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: test_load_store: +; LA64: # %bb.0: +; LA64-NEXT: ld.h $a0, $a0, 0 +; LA64-NEXT: st.h $a0, $a1, 0 +; LA64-NEXT: ret + %a = load half, ptr %p + store half %a, ptr %q + ret void +} + +define float @test_fpextend_float(ptr %p) nounwind { +; LA32-LABEL: test_fpextend_float: +; LA32: # %bb.0: +; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: b %plt(__gnu_h2f_ieee) +; +; LA64-LABEL: test_fpextend_float: +; LA64: # %bb.0: +; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: b %plt(__gnu_h2f_ieee) + %a = load half, ptr %p + %r = fpext half %a to float + ret float %r +} + +define double @test_fpextend_double(ptr %p) nounwind { +; LA32-LABEL: test_fpextend_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fcvt.d.s $fa0, $fa0 +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fpextend_double: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fcvt.d.s $fa0, $fa0 +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %a = load half, ptr %p + %r = fpext half %a to double + ret double %r +} + +define void @test_fptrunc_float(float %f, ptr %p) nounwind { +; LA32-LABEL: test_fptrunc_float: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: st.h $a0, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fptrunc_float: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: st.h $a0, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %a = fptrunc float %f to half + store half %a, ptr %p + ret void +} + +define void @test_fptrunc_double(double %d, ptr %p) nounwind { +; LA32-LABEL: test_fptrunc_double: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill +; LA32-NEXT: move $fp, $a0 +; LA32-NEXT: bl %plt(__truncdfhf2) +; LA32-NEXT: st.h $a0, $fp, 0 +; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fptrunc_double: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a0 +; LA64-NEXT: bl %plt(__truncdfhf2) +; LA64-NEXT: st.h $a0, $fp, 0 +; LA64-NEXT: ld.d $fp, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %a = fptrunc double %d to half + store half %a, ptr %p + ret void +} + +define half @test_fadd_reg(half %a, half %b) nounwind { +; LA32-LABEL: test_fadd_reg: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: fmov.s $fa0, $fa1 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fmov.s $fs1, $fa0 +; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fadd.s $fa0, $fa0, $fs1 +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fadd_reg: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: fmov.s $fa0, $fa1 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fmov.s $fs1, $fa0 +; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fadd.s $fa0, $fa0, $fs1 +; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %r = fadd half %a, %b + ret half %r +} + +define void @test_fadd_mem(ptr %p, ptr %q) nounwind { +; LA32-LABEL: test_fadd_mem: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: move $fp, $a1 +; LA32-NEXT: move $s0, $a0 +; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: ld.hu $a0, $fp, 0 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fadd.s $fa0, $fs0, $fa0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: st.h $a0, $s0, 0 +; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fadd_mem: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a1 +; LA64-NEXT: move $s0, $a0 +; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: ld.hu $a0, $fp, 0 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fadd.s $fa0, $fs0, $fa0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: st.h $a0, $s0, 0 +; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %a = load half, ptr %p + %b = load half, ptr %q + %r = fadd half %a, %b + store half %r, ptr %p + ret void +} + +define half @test_fmul_reg(half %a, half %b) nounwind { +; LA32-LABEL: test_fmul_reg: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: fmov.s $fa0, $fa1 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fmov.s $fs1, $fa0 +; LA32-NEXT: fmov.s $fa0, $fs0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fmul.s $fa0, $fa0, $fs1 +; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fmul_reg: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: fmov.s $fa0, $fa1 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fmov.s $fs1, $fa0 +; LA64-NEXT: fmov.s $fa0, $fs0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fmul.s $fa0, $fa0, $fs1 +; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %r = fmul half %a, %b + ret half %r +} + +define void @test_fmul_mem(ptr %p, ptr %q) nounwind { +; LA32-LABEL: test_fmul_mem: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill +; LA32-NEXT: move $fp, $a1 +; LA32-NEXT: move $s0, $a0 +; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fmov.s $fs0, $fa0 +; LA32-NEXT: ld.hu $a0, $fp, 0 +; LA32-NEXT: bl %plt(__gnu_h2f_ieee) +; LA32-NEXT: fmul.s $fa0, $fs0, $fa0 +; LA32-NEXT: bl %plt(__gnu_f2h_ieee) +; LA32-NEXT: st.h $a0, $s0, 0 +; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: test_fmul_mem: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill +; LA64-NEXT: move $fp, $a1 +; LA64-NEXT: move $s0, $a0 +; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fmov.s $fs0, $fa0 +; LA64-NEXT: ld.hu $a0, $fp, 0 +; LA64-NEXT: bl %plt(__gnu_h2f_ieee) +; LA64-NEXT: fmul.s $fa0, $fs0, $fa0 +; LA64-NEXT: bl %plt(__gnu_f2h_ieee) +; LA64-NEXT: st.h $a0, $s0, 0 +; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload +; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %a = load half, ptr %p + %b = load half, ptr %q + %r = fmul half %a, %b + store half %r, ptr %p + ret void +} -- cgit v1.1