diff options
author | Yeting Kuo <yeting.kuo@sifive.com> | 2022-12-06 08:39:07 +0800 |
---|---|---|
committer | Yeting Kuo <yeting.kuo@sifive.com> | 2022-12-07 14:15:13 +0800 |
commit | 8c8a6e1488245bfb5709dd645105c7ca47e344e0 (patch) | |
tree | 97452a25c2ac612267ee5b99d894dfa74f05d510 | |
parent | 7b50c183601adf8cacf20f3644cb0f72c7cd1125 (diff) | |
download | llvm-8c8a6e1488245bfb5709dd645105c7ca47e344e0.zip llvm-8c8a6e1488245bfb5709dd645105c7ca47e344e0.tar.gz llvm-8c8a6e1488245bfb5709dd645105c7ca47e344e0.tar.bz2 |
[RISCV] Add basic cost model for vp float rounding instructions.
Reviewed By: craig.topper, reames
Differential Revision: https://reviews.llvm.org/D137766
-rw-r--r-- | llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 25 | ||||
-rw-r--r-- | llvm/test/Analysis/CostModel/RISCV/fround.ll | 300 |
2 files changed, 325 insertions, 0 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ab67435..3b0763f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -678,6 +678,17 @@ static const CostTblEntry VectorIntrinsicCostTable[]{ {Intrinsic::ctpop, MVT::nxv8i64, 21}, }; +static unsigned getISDForVPIntrinsicID(Intrinsic::ID ID) { + switch (ID) { +#define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ + case Intrinsic::VPID: \ + return ISD::VPSD; +#include "llvm/IR/VPIntrinsics.def" +#undef HELPER_MAP_VPID_TO_VPSD + } + return ISD::DELETED_NODE; +} + InstructionCost RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { @@ -736,6 +747,20 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return Cost * LT.first; break; } + case Intrinsic::vp_ceil: + case Intrinsic::vp_floor: + case Intrinsic::vp_round: + case Intrinsic::vp_roundeven: + case Intrinsic::vp_roundtozero: { + // Rounding with static rounding mode needs two more instructions to + // swap/write FRM than vp_rint. + unsigned Cost = 7; + auto LT = getTypeLegalizationCost(RetTy); + unsigned VPISD = getISDForVPIntrinsicID(ICA.getID()); + if (TLI->isOperationCustom(VPISD, LT.second)) + return Cost * LT.first; + break; + } } if (ST->hasVInstructions() && RetTy->isVectorTy()) { diff --git a/llvm/test/Analysis/CostModel/RISCV/fround.ll b/llvm/test/Analysis/CostModel/RISCV/fround.ll index 417de25..37f02eb 100644 --- a/llvm/test/Analysis/CostModel/RISCV/fround.ll +++ b/llvm/test/Analysis/CostModel/RISCV/fround.ll @@ -316,6 +316,211 @@ define void @roundeven() { ret void } +define void @vp_ceil() { +; CHECK-LABEL: 'vp_ceil' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.ceil.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.ceil.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.ceil.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.ceil.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.ceil.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.ceil.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.ceil.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.ceil.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.ceil.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.ceil.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.ceil.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.ceil.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.ceil.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x float> @llvm.vp.ceil.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x float> @llvm.vp.ceil.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x float> @llvm.vp.ceil.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x float> @llvm.vp.ceil.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) + call <vscale x 16 x float> @llvm.vp.ceil.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) + call <2 x double> @llvm.vp.ceil.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.ceil.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.ceil.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x double> @llvm.vp.ceil.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x double> @llvm.vp.ceil.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x double> @llvm.vp.ceil.nvx5f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x double> @llvm.vp.ceil.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + ret void +} + +define void @vp_floor() { +; CHECK-LABEL: 'vp_floor' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.floor.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.floor.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.floor.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.floor.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.floor.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.floor.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.floor.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.floor.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.floor.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.floor.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.floor.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.floor.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.floor.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x float> @llvm.vp.floor.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x float> @llvm.vp.floor.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x float> @llvm.vp.floor.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x float> @llvm.vp.floor.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) + call <vscale x 16 x float> @llvm.vp.floor.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) + call <2 x double> @llvm.vp.floor.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.floor.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.floor.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.floor.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x double> @llvm.vp.floor.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x double> @llvm.vp.floor.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x double> @llvm.vp.floor.nvx5f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x double> @llvm.vp.floor.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + ret void +} + +define void @vp_round() { +; CHECK-LABEL: 'vp_round' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.round.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.round.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.round.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.round.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.round.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.round.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.round.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.round.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.round.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.round.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.round.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.round.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.round.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x float> @llvm.vp.round.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x float> @llvm.vp.round.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x float> @llvm.vp.round.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x float> @llvm.vp.round.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) + call <vscale x 16 x float> @llvm.vp.round.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) + call <2 x double> @llvm.vp.round.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.round.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.round.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.round.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x double> @llvm.vp.round.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x double> @llvm.vp.round.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x double> @llvm.vp.round.nvx5f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x double> @llvm.vp.round.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + ret void +} + +define void @vp_roundeven() { +; CHECK-LABEL: 'vp_roundeven' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.roundeven.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.roundeven.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.roundeven.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.roundeven.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.roundeven.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.roundeven.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.roundeven.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.roundeven.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.roundeven.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.roundeven.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.roundeven.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.roundeven.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.roundeven.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x float> @llvm.vp.roundeven.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x float> @llvm.vp.roundeven.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x float> @llvm.vp.roundeven.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x float> @llvm.vp.roundeven.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) + call <vscale x 16 x float> @llvm.vp.roundeven.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) + call <2 x double> @llvm.vp.roundeven.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.roundeven.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.roundeven.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x double> @llvm.vp.roundeven.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x double> @llvm.vp.roundeven.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x double> @llvm.vp.roundeven.nvx5f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x double> @llvm.vp.roundeven.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + ret void +} + +define void @vp_roundtozero() { +; CHECK-LABEL: 'vp_roundtozero' +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %2 = call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %3 = call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %4 = call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %5 = call <vscale x 1 x float> @llvm.vp.roundtozero.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %6 = call <vscale x 2 x float> @llvm.vp.roundtozero.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %7 = call <vscale x 4 x float> @llvm.vp.roundtozero.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %8 = call <vscale x 8 x float> @llvm.vp.roundtozero.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %9 = call <vscale x 16 x float> @llvm.vp.roundtozero.nxv16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %10 = call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %11 = call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %12 = call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %13 = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %14 = call <vscale x 1 x double> @llvm.vp.roundtozero.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %15 = call <vscale x 2 x double> @llvm.vp.roundtozero.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %16 = call <vscale x 4 x double> @llvm.vp.roundtozero.nxv4f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %17 = call <vscale x 8 x double> @llvm.vp.roundtozero.nxv8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) + call <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float> undef, <4 x i1> undef, i32 undef) + call <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float> undef, <8 x i1> undef, i32 undef) + call <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x float> @llvm.vp.roundtozero.nvx1f32(<vscale x 1 x float> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x float> @llvm.vp.roundtozero.nvx2f32(<vscale x 2 x float> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x float> @llvm.vp.roundtozero.nvx4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x float> @llvm.vp.roundtozero.nvx8f32(<vscale x 8 x float> undef, <vscale x 8 x i1> undef, i32 undef) + call <vscale x 16 x float> @llvm.vp.roundtozero.nvx16f32(<vscale x 16 x float> undef, <vscale x 16 x i1> undef, i32 undef) + call <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double> undef, <2 x i1> undef, i32 undef) + call <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double> undef, <4 x i1> undef, i32 undef) + call <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double> undef, <8 x i1> undef, i32 undef) + call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> undef, <16 x i1> undef, i32 undef) + call <vscale x 1 x double> @llvm.vp.roundtozero.nvx1f64(<vscale x 1 x double> undef, <vscale x 1 x i1> undef, i32 undef) + call <vscale x 2 x double> @llvm.vp.roundtozero.nvx2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> undef, i32 undef) + call <vscale x 4 x double> @llvm.vp.roundtozero.nvx5f64(<vscale x 4 x double> undef, <vscale x 4 x i1> undef, i32 undef) + call <vscale x 8 x double> @llvm.vp.roundtozero.nvx8f64(<vscale x 8 x double> undef, <vscale x 8 x i1> undef, i32 undef) + ret void +} + define void @vp_rint() { ; CHECK-LABEL: 'vp_rint' ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %1 = call <2 x float> @llvm.vp.rint.v2f32(<2 x float> undef, <2 x i1> undef, i32 undef) @@ -538,6 +743,101 @@ declare <vscale x 2 x double> @llvm.roundeven.nvx2f64(<vscale x 2 x double>) declare <vscale x 4 x double> @llvm.roundeven.nvx5f64(<vscale x 4 x double>) declare <vscale x 8 x double> @llvm.roundeven.nvx8f64(<vscale x 8 x double>) +declare <2 x float> @llvm.vp.ceil.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.ceil.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.ceil.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.ceil.v16f32(<16 x float>, <16 x i1>, i32) +declare <vscale x 1 x float> @llvm.vp.ceil.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x float> @llvm.vp.ceil.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x float> @llvm.vp.ceil.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x float> @llvm.vp.ceil.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) +declare <vscale x 16 x float> @llvm.vp.ceil.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) +declare double @llvm.vp.ceil.f64(double) +declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.ceil.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.ceil.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.ceil.v16f64(<16 x double>, <16 x i1>, i32) +declare <vscale x 1 x double> @llvm.vp.ceil.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x double> @llvm.vp.ceil.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x double> @llvm.vp.ceil.nvx5f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x double> @llvm.vp.ceil.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) + +declare <2 x float> @llvm.vp.floor.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.floor.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.floor.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.floor.v16f32(<16 x float>, <16 x i1>, i32) +declare <vscale x 1 x float> @llvm.vp.floor.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x float> @llvm.vp.floor.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x float> @llvm.vp.floor.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x float> @llvm.vp.floor.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) +declare <vscale x 16 x float> @llvm.vp.floor.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) +declare double @llvm.vp.floor.f64(double) +declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.floor.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.floor.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.floor.v16f64(<16 x double>, <16 x i1>, i32) +declare <vscale x 1 x double> @llvm.vp.floor.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x double> @llvm.vp.floor.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x double> @llvm.vp.floor.nvx5f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x double> @llvm.vp.floor.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) + +declare <2 x float> @llvm.vp.round.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.round.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.round.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.round.v16f32(<16 x float>, <16 x i1>, i32) +declare <vscale x 1 x float> @llvm.vp.round.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x float> @llvm.vp.round.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x float> @llvm.vp.round.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x float> @llvm.vp.round.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) +declare <vscale x 16 x float> @llvm.vp.round.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) +declare double @llvm.vp.round.f64(double) +declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.round.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.round.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.round.v16f64(<16 x double>, <16 x i1>, i32) +declare <vscale x 1 x double> @llvm.vp.round.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x double> @llvm.vp.round.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x double> @llvm.vp.round.nvx5f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x double> @llvm.vp.round.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) + +declare <2 x float> @llvm.vp.roundeven.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.roundeven.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.roundeven.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.roundeven.v16f32(<16 x float>, <16 x i1>, i32) +declare <vscale x 1 x float> @llvm.vp.roundeven.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x float> @llvm.vp.roundeven.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x float> @llvm.vp.roundeven.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x float> @llvm.vp.roundeven.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) +declare <vscale x 16 x float> @llvm.vp.roundeven.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) +declare double @llvm.vp.roundeven.f64(double) +declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.roundeven.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.roundeven.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.roundeven.v16f64(<16 x double>, <16 x i1>, i32) +declare <vscale x 1 x double> @llvm.vp.roundeven.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x double> @llvm.vp.roundeven.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x double> @llvm.vp.roundeven.nvx5f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x double> @llvm.vp.roundeven.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) + +declare <2 x float> @llvm.vp.roundtozero.v2f32(<2 x float>, <2 x i1>, i32) +declare <4 x float> @llvm.vp.roundtozero.v4f32(<4 x float>, <4 x i1>, i32) +declare <8 x float> @llvm.vp.roundtozero.v8f32(<8 x float>, <8 x i1>, i32) +declare <16 x float> @llvm.vp.roundtozero.v16f32(<16 x float>, <16 x i1>, i32) +declare <vscale x 1 x float> @llvm.vp.roundtozero.nvx1f32(<vscale x 1 x float>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x float> @llvm.vp.roundtozero.nvx2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x float> @llvm.vp.roundtozero.nvx4f32(<vscale x 4 x float>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x float> @llvm.vp.roundtozero.nvx8f32(<vscale x 8 x float>, <vscale x 8 x i1>, i32) +declare <vscale x 16 x float> @llvm.vp.roundtozero.nvx16f32(<vscale x 16 x float>, <vscale x 16 x i1>, i32) +declare double @llvm.vp.roundtozero.f64(double) +declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32) +declare <4 x double> @llvm.vp.roundtozero.v4f64(<4 x double>, <4 x i1>, i32) +declare <8 x double> @llvm.vp.roundtozero.v8f64(<8 x double>, <8 x i1>, i32) +declare <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double>, <16 x i1>, i32) +declare <vscale x 1 x double> @llvm.vp.roundtozero.nvx1f64(<vscale x 1 x double>, <vscale x 1 x i1>, i32) +declare <vscale x 2 x double> @llvm.vp.roundtozero.nvx2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) +declare <vscale x 4 x double> @llvm.vp.roundtozero.nvx5f64(<vscale x 4 x double>, <vscale x 4 x i1>, i32) +declare <vscale x 8 x double> @llvm.vp.roundtozero.nvx8f64(<vscale x 8 x double>, <vscale x 8 x i1>, i32) + declare <2 x float> @llvm.vp.rint.v2f32(<2 x float>, <2 x i1>, i32) declare <4 x float> @llvm.vp.rint.v4f32(<4 x float>, <4 x i1>, i32) declare <8 x float> @llvm.vp.rint.v8f32(<8 x float>, <8 x i1>, i32) |