diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2014-09-25 00:24:19 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2014-09-25 00:24:19 +0000 |
commit | 98443d89b964b3b9a7a3c61d4dd5f3194a5604b6 (patch) | |
tree | ce871a5ee83614fa2abe626c050a644db687b04e /llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | |
parent | 38cd67624b25d0ffd10e7d4ff9665789afcf3240 (diff) | |
download | llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.zip llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.gz llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.bz2 |
[x86] Implement v16i16 support with AVX2 in the new vector shuffle
lowering.
This also implements the fancy blend lowering for v16i16 using AVX2 and
teaches the X86 backend to print shuffle masks for 256-bit PSHUFB
and PBLENDW instructions. It also makes the mask decoding correct for
PBLENDW instructions. The yaks, they are legion.
Tests are updated accordingly. There are some missing tests for the
VBLENDVB lowering, but I'll add those in a follow-up as this commit has
accumulated enough cruft already.
llvm-svn: 218430
Diffstat (limited to 'llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp')
-rw-r--r-- | llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp | 15 |
1 files changed, 11 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 713e147..a3f4523 100644 --- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -301,11 +301,18 @@ void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, } } -void DecodeBLENDMask(MVT VT, unsigned Imm, - SmallVectorImpl<int> &ShuffleMask) { +void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { + int ElementBits = VT.getScalarSizeInBits(); int NumElements = VT.getVectorNumElements(); - for (int i = 0; i < NumElements; ++i) - ShuffleMask.push_back(((Imm >> i) & 1) ? NumElements + i : i); + for (int i = 0; i < NumElements; ++i) { + // If there are more than 8 elements in the vector, then any immediate blend + // mask applies to each 128-bit lane. There can never be more than + // 8 elements in a 128-bit lane with an immediate blend. + int Bit = NumElements > 8 ? i % (128 / ElementBits) : i; + assert(Bit < 8 && + "Immediate blends only operate over 8 elements at a time!"); + ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i); + } } /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. |