diff options
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 50 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll | 533 |
2 files changed, 578 insertions, 5 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 6b6f541..59fe868 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -95,6 +95,9 @@ def riscv_vmset_vl : SDNode<"RISCVISD::VMSET_VL", SDT_RISCVVMSETCLR_VL>; def true_mask : PatLeaf<(riscv_vmset_vl (XLenVT srcvalue))>; +def SplatFPOp : PatFrag<(ops node:$op), + (riscv_vfmv_v_f_vl node:$op, srcvalue)>; + class VPatBinaryVL_VV<SDNode vop, string instruction_name, ValueType result_type, @@ -133,15 +136,48 @@ multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name, } } +class VPatBinaryVL_VF<SDNode vop, + string instruction_name, + ValueType result_type, + ValueType vop_type, + ValueType mask_type, + int sew, + LMULInfo vlmul, + VReg RetClass, + VReg vop_reg_class, + RegisterClass scalar_reg_class> : + Pat<(result_type (vop (vop_type vop_reg_class:$rs1), + (vop_type (SplatFPOp scalar_reg_class:$rs2)), + (mask_type true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(instruction_name#"_"#vlmul.MX) + vop_reg_class:$rs1, + scalar_reg_class:$rs2, + GPR:$vl, sew)>; + multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> { foreach vti = AllFloatVectors in { def : VPatBinaryVL_VV<vop, instruction_name, vti.Vector, vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass, vti.RegClass>; - // FIXME: Support splats. + def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix, + vti.Vector, vti.Vector, vti.Mask, vti.SEW, + vti.LMul, vti.RegClass, vti.RegClass, + vti.ScalarRegClass>; } } +multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> { + foreach fvti = AllFloatVectors in + def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2), + fvti.RegClass:$rs1, + (fvti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) + fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2, + GPR:$vl, fvti.SEW)>; +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -190,10 +226,12 @@ let Predicates = [HasStdExtV, HasStdExtF] in { // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions defm "" : VPatBinaryFPVL_VV_VF<riscv_fadd_vl, "PseudoVFADD">; defm "" : VPatBinaryFPVL_VV_VF<riscv_fsub_vl, "PseudoVFSUB">; +defm "" : VPatBinaryFPVL_R_VF<riscv_fsub_vl, "PseudoVFRSUB">; // 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions defm "" : VPatBinaryFPVL_VV_VF<riscv_fmul_vl, "PseudoVFMUL">; defm "" : VPatBinaryFPVL_VV_VF<riscv_fdiv_vl, "PseudoVFDIV">; +defm "" : VPatBinaryFPVL_R_VF<riscv_fdiv_vl, "PseudoVFRDIV">; // 14.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions. foreach vti = AllFloatVectors in { @@ -206,6 +244,16 @@ foreach vti = AllFloatVectors in { (!cast<Instruction>("PseudoVFMADD_VV_"# suffix) vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.SEW)>; + + // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally + // commutable. + def : Pat<(vti.Vector (riscv_fma_vl (SplatFPOp vti.ScalarRegClass:$rs1), + vti.RegClass:$rd, vti.RegClass:$rs2, + (vti.Mask true_mask), + (XLenVT (VLOp GPR:$vl)))), + (!cast<Instruction>("PseudoVFMADD_V" # vti.ScalarSuffix # "_" # suffix) + vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, + GPR:$vl, vti.SEW)>; } // 14.12. Vector Floating-Point Sign-Injection Instructions diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index f559096..7407aa8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32 +; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+experimental-zfh,+f,+d -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64 define void @fadd_v8f16(<8 x half>* %x, <8 x half>* %y) { ; CHECK-LABEL: fadd_v8f16: @@ -1101,3 +1101,528 @@ define void @fma_v4f64(<4 x double>* %x, <4 x double>* %y, <4 x double>* %z) { ret void } declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) + +define void @fadd_vf_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fadd_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfadd.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fadd <8 x half> %a, %c + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fadd_vf_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fadd_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfadd.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fadd <4 x float> %a, %c + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fadd_vf_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fadd_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfadd.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fadd <2 x double> %a, %c + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fadd_fv_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fadd_fv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfadd.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fadd <8 x half> %c, %a + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fadd_fv_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fadd_fv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfadd.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fadd <4 x float> %c, %a + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fadd_fv_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fadd_fv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfadd.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fadd <2 x double> %c, %a + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fsub_vf_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fsub_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfsub.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fsub <8 x half> %a, %c + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fsub_vf_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fsub_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfsub.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fsub <4 x float> %a, %c + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fsub_vf_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fsub_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfsub.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fsub <2 x double> %a, %c + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fsub_fv_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fsub_fv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfrsub.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fsub <8 x half> %c, %a + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fsub_fv_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fsub_fv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfrsub.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fsub <4 x float> %c, %a + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fsub_fv_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fsub_fv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfrsub.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fsub <2 x double> %c, %a + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fmul_vf_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fmul_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfmul.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fmul <8 x half> %a, %c + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fmul_vf_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fmul_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfmul.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fmul <4 x float> %a, %c + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fmul_vf_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fmul_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfmul.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fmul <2 x double> %a, %c + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fmul_fv_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fmul_fv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfmul.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fmul <8 x half> %c, %a + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fmul_fv_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fmul_fv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfmul.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fmul <4 x float> %c, %a + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fmul_fv_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fmul_fv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfmul.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fmul <2 x double> %c, %a + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fdiv_vf_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fdiv_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfdiv.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fdiv <8 x half> %a, %c + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fdiv_vf_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fdiv_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfdiv.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fdiv <4 x float> %a, %c + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fdiv_vf_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fdiv_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfdiv.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fdiv <2 x double> %a, %c + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fdiv_fv_v8f16(<8 x half>* %x, half %y) { +; CHECK-LABEL: fdiv_fv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 8 +; CHECK-NEXT: vsetvli a1, a1, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vfrdiv.vf v25, v25, fa0 +; CHECK-NEXT: vse16.v v25, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = insertelement <8 x half> undef, half %y, i32 0 + %c = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer + %d = fdiv <8 x half> %c, %a + store <8 x half> %d, <8 x half>* %x + ret void +} + +define void @fdiv_fv_v4f32(<4 x float>* %x, float %y) { +; CHECK-LABEL: fdiv_fv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 4 +; CHECK-NEXT: vsetvli a1, a1, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vfrdiv.vf v25, v25, fa0 +; CHECK-NEXT: vse32.v v25, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = insertelement <4 x float> undef, float %y, i32 0 + %c = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer + %d = fdiv <4 x float> %c, %a + store <4 x float> %d, <4 x float>* %x + ret void +} + +define void @fdiv_fv_v2f64(<2 x double>* %x, double %y) { +; CHECK-LABEL: fdiv_fv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a1, zero, 2 +; CHECK-NEXT: vsetvli a1, a1, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vfrdiv.vf v25, v25, fa0 +; CHECK-NEXT: vse64.v v25, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = insertelement <2 x double> undef, double %y, i32 0 + %c = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer + %d = fdiv <2 x double> %c, %a + store <2 x double> %d, <2 x double>* %x + ret void +} + +define void @fma_vf_v8f16(<8 x half>* %x, <8 x half>* %y, half %z) { +; CHECK-LABEL: fma_vf_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vfmv.v.f v27, fa0 +; CHECK-NEXT: vfmadd.vv v27, v25, v26 +; CHECK-NEXT: vse16.v v27, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = insertelement <8 x half> undef, half %z, i32 0 + %d = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %d, <8 x half> %b) + store <8 x half> %e, <8 x half>* %x + ret void +} + +define void @fma_vf_v4f32(<4 x float>* %x, <4 x float>* %y, float %z) { +; CHECK-LABEL: fma_vf_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vfmv.v.f v27, fa0 +; CHECK-NEXT: vfmadd.vv v27, v25, v26 +; CHECK-NEXT: vse32.v v27, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = insertelement <4 x float> undef, float %z, i32 0 + %d = shufflevector <4 x float> %c, <4 x float> undef, <4 x i32> zeroinitializer + %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %d, <4 x float> %b) + store <4 x float> %e, <4 x float>* %x + ret void +} + +define void @fma_vf_v2f64(<2 x double>* %x, <2 x double>* %y, double %z) { +; CHECK-LABEL: fma_vf_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vfmv.v.f v27, fa0 +; CHECK-NEXT: vfmadd.vv v27, v25, v26 +; CHECK-NEXT: vse64.v v27, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = insertelement <2 x double> undef, double %z, i32 0 + %d = shufflevector <2 x double> %c, <2 x double> undef, <2 x i32> zeroinitializer + %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %d, <2 x double> %b) + store <2 x double> %e, <2 x double>* %x + ret void +} + +define void @fma_fv_v8f16(<8 x half>* %x, <8 x half>* %y, half %z) { +; CHECK-LABEL: fma_fv_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 8 +; CHECK-NEXT: vsetvli a2, a2, e16,m1,ta,mu +; CHECK-NEXT: vle16.v v25, (a0) +; CHECK-NEXT: vle16.v v26, (a1) +; CHECK-NEXT: vfmacc.vf v26, fa0, v25 +; CHECK-NEXT: vse16.v v26, (a0) +; CHECK-NEXT: ret + %a = load <8 x half>, <8 x half>* %x + %b = load <8 x half>, <8 x half>* %y + %c = insertelement <8 x half> undef, half %z, i32 0 + %d = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer + %e = call <8 x half> @llvm.fma.v8f16(<8 x half> %d, <8 x half> %a, <8 x half> %b) + store <8 x half> %e, <8 x half>* %x + ret void +} + +define void @fma_fv_v4f32(<4 x float>* %x, <4 x float>* %y, float %z) { +; CHECK-LABEL: fma_fv_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 4 +; CHECK-NEXT: vsetvli a2, a2, e32,m1,ta,mu +; CHECK-NEXT: vle32.v v25, (a0) +; CHECK-NEXT: vle32.v v26, (a1) +; CHECK-NEXT: vfmacc.vf v26, fa0, v25 +; CHECK-NEXT: vse32.v v26, (a0) +; CHECK-NEXT: ret + %a = load <4 x float>, <4 x float>* %x + %b = load <4 x float>, <4 x float>* %y + %c = insertelement <4 x float> undef, float %z, i32 0 + %d = shufflevector <4 x float> %c, <4 x float> undef, <4 x i32> zeroinitializer + %e = call <4 x float> @llvm.fma.v4f32(<4 x float> %d, <4 x float> %a, <4 x float> %b) + store <4 x float> %e, <4 x float>* %x + ret void +} + +define void @fma_fv_v2f64(<2 x double>* %x, <2 x double>* %y, double %z) { +; CHECK-LABEL: fma_fv_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi a2, zero, 2 +; CHECK-NEXT: vsetvli a2, a2, e64,m1,ta,mu +; CHECK-NEXT: vle64.v v25, (a0) +; CHECK-NEXT: vle64.v v26, (a1) +; CHECK-NEXT: vfmacc.vf v26, fa0, v25 +; CHECK-NEXT: vse64.v v26, (a0) +; CHECK-NEXT: ret + %a = load <2 x double>, <2 x double>* %x + %b = load <2 x double>, <2 x double>* %y + %c = insertelement <2 x double> undef, double %z, i32 0 + %d = shufflevector <2 x double> %c, <2 x double> undef, <2 x i32> zeroinitializer + %e = call <2 x double> @llvm.fma.v2f64(<2 x double> %d, <2 x double> %a, <2 x double> %b) + store <2 x double> %e, <2 x double>* %x + ret void +} |
