aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2014-09-25 00:24:19 +0000
committerChandler Carruth <chandlerc@gmail.com>2014-09-25 00:24:19 +0000
commit98443d89b964b3b9a7a3c61d4dd5f3194a5604b6 (patch)
treece871a5ee83614fa2abe626c050a644db687b04e /llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
parent38cd67624b25d0ffd10e7d4ff9665789afcf3240 (diff)
downloadllvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.zip
llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.gz
llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.bz2
[x86] Implement v16i16 support with AVX2 in the new vector shuffle
lowering. This also implements the fancy blend lowering for v16i16 using AVX2 and teaches the X86 backend to print shuffle masks for 256-bit PSHUFB and PBLENDW instructions. It also makes the mask decoding correct for PBLENDW instructions. The yaks, they are legion. Tests are updated accordingly. There are some missing tests for the VBLENDVB lowering, but I'll add those in a follow-up as this commit has accumulated enough cruft already. llvm-svn: 218430
Diffstat (limited to 'llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp')
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp15
1 files changed, 11 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 713e147..a3f4523 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -301,11 +301,18 @@ void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
}
}
-void DecodeBLENDMask(MVT VT, unsigned Imm,
- SmallVectorImpl<int> &ShuffleMask) {
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+ int ElementBits = VT.getScalarSizeInBits();
int NumElements = VT.getVectorNumElements();
- for (int i = 0; i < NumElements; ++i)
- ShuffleMask.push_back(((Imm >> i) & 1) ? NumElements + i : i);
+ for (int i = 0; i < NumElements; ++i) {
+ // If there are more than 8 elements in the vector, then any immediate blend
+ // mask applies to each 128-bit lane. There can never be more than
+ // 8 elements in a 128-bit lane with an immediate blend.
+ int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
+ assert(Bit < 8 &&
+ "Immediate blends only operate over 8 elements at a time!");
+ ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
+ }
}
/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.