diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 067ad6d..00095f0 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8463,6 +8463,50 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2, DAG.getConstant(InsertPSMask, MVT::i8)); } +/// \brief Try to lower a shuffle as a permute of the inputs followed by an +/// UNPCK instruction. +/// +/// This specifically targets cases where we end up with alternating between +/// the two inputs, and so can permute them into something that feeds a single +/// UNPCK instruction. +static SDValue lowerVectorShuffleAsUnpack(MVT VT, SDLoc DL, SDValue V1, + SDValue V2, ArrayRef<int> Mask, + SelectionDAG &DAG) { + assert(!isSingleInputShuffleMask(Mask) && + "This routine should only be used when blending two inputs."); + assert(Mask.size() >= 2 && "Single element masks are invalid."); + + int Size = Mask.size(); + + int NumLoInputs = std::count_if(Mask.begin(), Mask.end(), [Size](int M) { + return M >= 0 && M % Size < Size / 2; + }); + int NumHiInputs = std::count_if( + Mask.begin(), Mask.end(), [Size](int M) { return M % Size > Size / 2; }); + + bool UnpackLo = NumLoInputs >= NumHiInputs; + + SmallVector<int, 32> V1Mask(Mask.size(), -1); + SmallVector<int, 32> V2Mask(Mask.size(), -1); + for (int i = 0; i < Size; ++i) { + if (Mask[i] < 0) + continue; + + // We only handle the case where V1 feeds even mask slots and V2 feeds odd + // mask slots. We rely on canonicalization to ensure this is the case. + if ((i % 2 == 0) != (Mask[i] < Size)) + return SDValue(); + + SmallVectorImpl<int> &VMask = (i % 2 == 0) ? V1Mask : V2Mask; + VMask[i / 2 + (UnpackLo ? 0 : Size / 2)] = Mask[i] % Size; + } + + V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); + V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); + return DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, VT, V1, + V2); +} + /// \brief Handle lowering of 2-lane 64-bit floating point shuffles. /// /// This is the basis function for the 2-lane 64-bit shuffles as we have full @@ -8921,6 +8965,11 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2, Mask, DAG); + // Try to lower by permuting the inputs into an unpack instruction. + if (SDValue Unpack = + lowerVectorShuffleAsUnpack(MVT::v4i32, DL, V1, V2, Mask, DAG)) + return Unpack; + // We implement this with SHUFPS because it can blend from two vectors. // Because we're going to eventually use SHUFPS, we use SHUFPS even to build // up the inputs, bypassing domain shift penalties that we would encur if we @@ -9670,6 +9719,11 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2, Mask, DAG); + // Try to lower by permuting the inputs into an unpack instruction. + if (SDValue Unpack = + lowerVectorShuffleAsUnpack(MVT::v8i16, DL, V1, V2, Mask, DAG)) + return Unpack; + int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
