diff options
author | Yeting Kuo <yeting.kuo@sifive.com> | 2022-12-13 17:41:46 +0800 |
---|---|---|
committer | Yeting Kuo <yeting.kuo@sifive.com> | 2022-12-14 09:47:44 +0800 |
commit | ad68586a37e8e23eeb269d03bd28eb46e16bf48c (patch) | |
tree | e529235de40573d0654bd61f8baa49d9ae1bc790 /llvm/lib/CodeGen | |
parent | 69c984b6b803f00371dcf028bc9cf9b07911d1d6 (diff) | |
download | llvm-ad68586a37e8e23eeb269d03bd28eb46e16bf48c.zip llvm-ad68586a37e8e23eeb269d03bd28eb46e16bf48c.tar.gz llvm-ad68586a37e8e23eeb269d03bd28eb46e16bf48c.tar.bz2 |
[VP][RISCV] Add vp.ctpop and RISC-V support.
The patch also adds expandVPCTPOP in TargetLowering to expand VP_CTPOP nodes.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D139920
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 57 |
3 files changed, 65 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index b66e710..eb00d3f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -800,6 +800,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::VP_CTPOP: + if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 790cba9..3bec579 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1020,6 +1020,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: + case ISD::VP_CTPOP: case ISD::FABS: case ISD::VP_FABS: case ISD::FCEIL: case ISD::VP_FCEIL: @@ -4098,6 +4099,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: + case ISD::VP_CTPOP: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::VP_FNEG: diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5ad4eeb..202178e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -8229,6 +8229,63 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getConstant(Len - 8, dl, ShVT)); } +SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + unsigned Len = VT.getScalarSizeInBits(); + assert(VT.isInteger() && "VP_CTPOP not implemented for this type."); + + // TODO: Add support for irregular type lengths. + if (!(Len <= 128 && Len % 8 == 0)) + return SDValue(); + + // This is same algorithm of expandCTPOP from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + SDValue Mask55 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); + SDValue Mask33 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); + SDValue Mask0F = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); + + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5; + + // v = v - ((v >> 1) & 0x55555555...) + Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getConstant(1, dl, ShVT), Mask, VL), + Mask55, Mask, VL); + Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL); + + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getConstant(2, dl, ShVT), Mask, VL), + Mask33, Mask, VL); + Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL); + + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT), + Mask, VL), + Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL); + Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL); + + if (Len <= 8) + return Op; + + // v = (v * 0x01010101...) >> (Len - 8) + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + return DAG.getNode(ISD::VP_LSHR, dl, VT, + DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL), + DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); +} + SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); |