//===- RISCVTargetTransformInfo.h - RISC-V specific TTI ---------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// /// \file /// This file defines a TargetTransformInfoImplBase conforming object specific /// to the RISC-V target machine. It uses the target's detailed information to /// provide more precise answers to certain TTI queries, while letting the /// target independent and default TTI implementations handle the rest. /// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H #define LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/IR/Function.h" #include namespace llvm { class RISCVTTIImpl final : public BasicTTIImplBase { using BaseT = BasicTTIImplBase; using TTI = TargetTransformInfo; friend BaseT; const RISCVSubtarget *ST; const RISCVTargetLowering *TLI; const RISCVSubtarget *getST() const { return ST; } const RISCVTargetLowering *getTLI() const { return TLI; } /// This function returns an estimate for VL to be used in VL based terms /// of the cost model. For fixed length vectors, this is simply the /// vector length. For scalable vectors, we return results consistent /// with getVScaleForTuning under the assumption that clients are also /// using that when comparing costs between scalar and vector representation. /// This does unfortunately mean that we can both undershoot and overshot /// the true cost significantly if getVScaleForTuning is wildly off for the /// actual target hardware. unsigned getEstimatedVLFor(VectorType *Ty) const; /// This function calculates the costs for one or more RVV opcodes based /// on the vtype and the cost kind. /// \param Opcodes A list of opcodes of the RVV instruction to evaluate. /// \param VT The MVT of vtype associated with the RVV instructions. /// For widening/narrowing instructions where the result and source types /// differ, it is important to check the spec to determine whether the vtype /// refers to the result or source type. /// \param CostKind The type of cost to compute. InstructionCost getRISCVInstructionCost(ArrayRef OpCodes, MVT VT, TTI::TargetCostKind CostKind) const; /// Return the cost of accessing a constant pool entry of the specified /// type. InstructionCost getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) const; /// If this shuffle can be lowered as a masked slide pair (at worst), /// return a cost for it. InstructionCost getSlideCost(FixedVectorType *Tp, ArrayRef Mask, TTI::TargetCostKind CostKind) const; public: explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} /// Return the cost of materializing an immediate for a value operand of /// a store instruction. InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const; InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override; InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst = nullptr) const override; InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override; /// \name EVL Support for predicated vectorization. /// Whether the target supports the %evl parameter of VP intrinsic efficiently /// in hardware. (see LLVM Language Reference - "Vector Predication /// Intrinsics", /// https://llvm.org/docs/LangRef.html#vector-predication-intrinsics and /// "IR-level VP intrinsics", /// https://llvm.org/docs/Proposals/VectorPredication.html#ir-level-vp-intrinsics). bool hasActiveVectorLength() const override; TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; InstructionCost getPartialReductionCost( unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional BinOp, TTI::TargetCostKind CostKind) const override; bool shouldExpandReduction(const IntrinsicInst *II) const override; bool supportsScalableVectors() const override { return ST->hasVInstructions(); } bool enableOrderedReductions() const override { return true; } bool enableScalableVectorization() const override { return ST->hasVInstructions(); } TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const override { return ST->hasVInstructions() ? TailFoldingStyle::Data : TailFoldingStyle::DataWithoutLaneMask; } std::optional getMaxVScale() const override; std::optional getVScaleForTuning() const override; TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override; unsigned getRegUsageForType(Type *Ty) const override; unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override; bool preferAlternateOpcodeVectorization() const override { return false; } bool preferEpilogueVectorization() const override { // Epilogue vectorization is usually unprofitable - tail folding or // a smaller VF would have been better. This a blunt hammer - we // should re-examine this once vectorization is better tuned. return false; } InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const override; InstructionCost getPointersChainCost(ArrayRef Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override; void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override; void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override; unsigned getMinVectorRegisterBitWidth() const override { return ST->useRVVForFixedLengthVectors() ? 16 : 0; } InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef Args = {}, const Instruction *CxtI = nullptr) const override; InstructionCost getScalarizationOverhead( VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc = true, ArrayRef VL = {}) const override; InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override; InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) const override; InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override; InstructionCost getExpandCompressMemoryOpCost(unsigned Opcode, Type *Src, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const override; InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const override; InstructionCost getCostOfKeepingLiveOverCall(ArrayRef Tys) const override; InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const override; InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override; InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional FMF, TTI::TargetCostKind CostKind) const override; InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional FMF, TTI::TargetCostKind CostKind) const override; InstructionCost getMemoryOpCost( unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr) const override; InstructionCost getCmpSelInstrCost( unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I = nullptr) const override; InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) const override; using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override; InstructionCost getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info = {TTI::OK_AnyValue, TTI::OP_None}, ArrayRef Args = {}, const Instruction *CxtI = nullptr) const override; bool isElementTypeLegalForScalableVector(Type *Ty) const override { return TLI->isLegalElementTypeForRVV(TLI->getValueType(DL, Ty)); } bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const { if (!ST->hasVInstructions()) return false; EVT DataTypeVT = TLI->getValueType(DL, DataType); // Only support fixed vectors if we know the minimum vector size. if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors()) return false; EVT ElemType = DataTypeVT.getScalarType(); if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); } bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned /*AddressSpace*/) const override { return isLegalMaskedLoadStore(DataType, Alignment); } bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned /*AddressSpace*/) const override { return isLegalMaskedLoadStore(DataType, Alignment); } bool isLegalMaskedGatherScatter(Type *DataType, Align Alignment) const { if (!ST->hasVInstructions()) return false; EVT DataTypeVT = TLI->getValueType(DL, DataType); // Only support fixed vectors if we know the minimum vector size. if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors()) return false; // We also need to check if the vector of address is valid. EVT PointerTypeVT = EVT(TLI->getPointerTy(DL)); if (DataTypeVT.isScalableVector() && !TLI->isLegalElementTypeForRVV(PointerTypeVT)) return false; EVT ElemType = DataTypeVT.getScalarType(); if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize()) return false; return TLI->isLegalElementTypeForRVV(ElemType); } bool isLegalMaskedGather(Type *DataType, Align Alignment) const override { return isLegalMaskedGatherScatter(DataType, Alignment); } bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override { return isLegalMaskedGatherScatter(DataType, Alignment); } bool forceScalarizeMaskedGather(VectorType *VTy, Align Alignment) const override { // Scalarize masked gather for RV64 if EEW=64 indices aren't supported. return ST->is64Bit() && !ST->hasVInstructionsI64(); } bool forceScalarizeMaskedScatter(VectorType *VTy, Align Alignment) const override { // Scalarize masked scatter for RV64 if EEW=64 indices aren't supported. return ST->is64Bit() && !ST->hasVInstructionsI64(); } bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override { EVT DataTypeVT = TLI->getValueType(DL, DataType); return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment); } bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const override { return TLI->isLegalInterleavedAccessType(VTy, Factor, Alignment, AddrSpace, DL); } bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override; bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override; bool isVScaleKnownToBeAPowerOfTwo() const override { return TLI->isVScaleKnownToBeAPowerOfTwo(); } /// \returns How the target needs this vector-predicated operation to be /// transformed. TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const override { using VPLegalization = TargetTransformInfo::VPLegalization; if (!ST->hasVInstructions() || (PI.getIntrinsicID() == Intrinsic::vp_reduce_mul && cast(PI.getArgOperand(1)->getType()) ->getElementType() ->getIntegerBitWidth() != 1)) return VPLegalization(VPLegalization::Discard, VPLegalization::Convert); return VPLegalization(VPLegalization::Legal, VPLegalization::Legal); } bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override { if (!VF.isScalable()) return true; Type *Ty = RdxDesc.getRecurrenceType(); if (!TLI->isLegalElementTypeForRVV(TLI->getValueType(DL, Ty))) return false; switch (RdxDesc.getRecurrenceKind()) { case RecurKind::Add: case RecurKind::And: case RecurKind::Or: case RecurKind::Xor: case RecurKind::SMin: case RecurKind::SMax: case RecurKind::UMin: case RecurKind::UMax: case RecurKind::FMin: case RecurKind::FMax: return true; case RecurKind::AnyOf: case RecurKind::FAdd: case RecurKind::FMulAdd: // We can't promote f16/bf16 fadd reductions and scalable vectors can't be // expanded. if (Ty->isBFloatTy() || (Ty->isHalfTy() && !ST->hasVInstructionsF16())) return false; return true; default: return false; } } unsigned getMaxInterleaveFactor(ElementCount VF) const override { // Don't interleave if the loop has been vectorized with scalable vectors. if (VF.isScalable()) return 1; // If the loop will not be vectorized, don't interleave the loop. // Let regular unroll to unroll the loop. return VF.isScalar() ? 1 : ST->getMaxInterleaveFactor(); } bool enableInterleavedAccessVectorization() const override { return true; } unsigned getMinTripCountTailFoldingThreshold() const override; enum RISCVRegisterClass { GPRRC, FPRRC, VRRC }; unsigned getNumberOfRegisters(unsigned ClassID) const override { switch (ClassID) { case RISCVRegisterClass::GPRRC: // 31 = 32 GPR - x0 (zero register) // FIXME: Should we exclude fixed registers like SP, TP or GP? return 31; case RISCVRegisterClass::FPRRC: if (ST->hasStdExtF()) return 32; return 0; case RISCVRegisterClass::VRRC: // Although there are 32 vector registers, v0 is special in that it is the // only register that can be used to hold a mask. // FIXME: Should we conservatively return 31 as the number of usable // vector registers? return ST->hasVInstructions() ? 32 : 0; } llvm_unreachable("unknown register class"); } TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override; unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override { if (Vector) return RISCVRegisterClass::VRRC; if (!Ty) return RISCVRegisterClass::GPRRC; Type *ScalarTy = Ty->getScalarType(); if ((ScalarTy->isHalfTy() && ST->hasStdExtZfhmin()) || (ScalarTy->isFloatTy() && ST->hasStdExtF()) || (ScalarTy->isDoubleTy() && ST->hasStdExtD())) { return RISCVRegisterClass::FPRRC; } return RISCVRegisterClass::GPRRC; } const char *getRegisterClassName(unsigned ClassID) const override { switch (ClassID) { case RISCVRegisterClass::GPRRC: return "RISCV::GPRRC"; case RISCVRegisterClass::FPRRC: return "RISCV::FPRRC"; case RISCVRegisterClass::VRRC: return "RISCV::VRRC"; } llvm_unreachable("unknown register class"); } bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override; bool shouldConsiderAddressTypePromotion( const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override; std::optional getMinPageSize() const override { return 4096; } /// Return true if the (vector) instruction I will be lowered to an /// instruction with a scalar splat operand for the given Operand number. bool canSplatOperand(Instruction *I, int Operand) const; /// Return true if a vector instruction will lower to a target instruction /// able to splat the given operand. bool canSplatOperand(unsigned Opcode, int Operand) const; bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override; }; } // end namespace llvm #endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H