diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 71 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h | 3 |
4 files changed, 120 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 20fc849..dd233e2 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -657,6 +657,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + if (Subtarget.isISA3_0() && isPPC64) { + setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom); + setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom); + setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom); + setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom); + setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom); + setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom); + setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom); + setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom); + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); @@ -11917,6 +11928,62 @@ SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op, return getDataClassTest(LHS, Category, Dl, DAG, Subtarget); } +// Adjust the length value for a load/store with length to account for the +// instructions requiring a left justified length, and for non-byte element +// types requiring scaling by element size. +static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, + SelectionDAG &DAG) { + SDLoc dl(Val); + EVT VT = Val->getValueType(0); + unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0; + unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8); + SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT); + return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt); +} + +SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const { + auto VPLD = cast<VPLoadSDNode>(Op); + bool Future = Subtarget.isISAFuture(); + SDLoc dl(Op); + assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) && + "Mask predication not supported"); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4)); + unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl; + unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits(); + Len = AdjustLength(Len, EltBits, !Future, DAG); + SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32), + VPLD->getOperand(1), Len}; + SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other); + SDValue VPL = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys, Ops, + VPLD->getMemoryVT(), VPLD->getMemOperand()); + return VPL; +} + +SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const { + auto VPST = cast<VPStoreSDNode>(Op); + assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) && + "Mask predication not supported"); + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDLoc dl(Op); + SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5)); + unsigned EltBits = + Op->getOperand(1).getValueType().getScalarType().getSizeInBits(); + bool Future = Subtarget.isISAFuture(); + unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl; + Len = AdjustLength(Len, EltBits, !Future, DAG); + SDValue Ops[] = { + VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)), + VPST->getOperand(2), Len}; + SDVTList Tys = DAG.getVTList(MVT::Other); + SDValue VPS = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, + VPST->getMemoryVT(), VPST->getMemOperand()); + return VPS; +} + SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -12771,6 +12838,10 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Op->getFlags().hasNoFPExcept()) return Op; return SDValue(); + case ISD::VP_LOAD: + return LowerVP_LOAD(Op, DAG); + case ISD::VP_STORE: + return LowerVP_STORE(Op, DAG); } } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 880aca7..d967018 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1345,6 +1345,9 @@ namespace llvm { SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index b04e887..e74f1bd 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -24,6 +24,10 @@ using namespace llvm; #define DEBUG_TYPE "ppctti" +static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl", + cl::desc("Allow vp.load and vp.store for pwr9"), + cl::init(false), cl::Hidden); + static cl::opt<bool> VecMaskCost("ppc-vec-mask-cost", cl::desc("add masking cost for i1 vectors"), cl::init(true), cl::Hidden); @@ -1031,3 +1035,42 @@ bool PPCTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const { return TLI->supportsTailCallFor(CB); } + +// Target hook used by CodeGen to decide whether to expand vector predication +// intrinsics into scalar operations or to use special ISD nodes to represent +// them. The Target will not see the intrinsics. +TargetTransformInfo::VPLegalization +PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const { + using VPLegalization = TargetTransformInfo::VPLegalization; + unsigned Directive = ST->getCPUDirective(); + VPLegalization DefaultLegalization = BaseT::getVPLegalizationStrategy(PI); + if (Directive != PPC::DIR_PWR10 && Directive != PPC::DIR_PWR_FUTURE && + (!Pwr9EVL || Directive != PPC::DIR_PWR9)) + return DefaultLegalization; + + if (!ST->isPPC64()) + return DefaultLegalization; + + unsigned IID = PI.getIntrinsicID(); + if (IID != Intrinsic::vp_load && IID != Intrinsic::vp_store) + return DefaultLegalization; + + bool IsLoad = IID == Intrinsic::vp_load; + Type *VecTy = IsLoad ? PI.getType() : PI.getOperand(0)->getType(); + EVT VT = TLI->getValueType(DL, VecTy, true); + if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && + VT != MVT::v16i8) + return DefaultLegalization; + + auto IsAllTrueMask = [](Value *MaskVal) { + if (Value *SplattedVal = getSplatValue(MaskVal)) + if (auto *ConstValue = dyn_cast<Constant>(SplattedVal)) + return ConstValue->isAllOnesValue(); + return false; + }; + unsigned MaskIx = IsLoad ? 1 : 2; + if (!IsAllTrueMask(PI.getOperand(MaskIx))) + return DefaultLegalization; + + return VPLegalization(VPLegalization::Legal, VPLegalization::Legal); +} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index 8d7f255..f80ebdb 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -150,6 +150,9 @@ public: ArrayRef<Type *> Types) const override; bool supportsTailCallFor(const CallBase *CB) const override; + TargetTransformInfo::VPLegalization + getVPLegalizationStrategy(const VPIntrinsic &PI) const override; + private: // The following constant is used for estimating costs on power9. static const InstructionCost::CostType P9PipelineFlushEstimate = 80; |
