[x86] Implement v16i16 support with AVX2 in the new vector shuffle

lowering. This also implements the fancy blend lowering for v16i16 using AVX2 and teaches the X86 backend to print shuffle masks for 256-bit PSHUFB and PBLENDW instructions. It also makes the mask decoding correct for PBLENDW instructions. The yaks, they are legion. Tests are updated accordingly. There are some missing tests for the VBLENDVB lowering, but I'll add those in a follow-up as this commit has accumulated enough cruft already. llvm-svn: 218430
author: Chandler Carruth <chandlerc@gmail.com> 2014-09-25 00:24:19 +0000
committer: Chandler Carruth <chandlerc@gmail.com> 2014-09-25 00:24:19 +0000
commit: 98443d89b964b3b9a7a3c61d4dd5f3194a5604b6 (patch)
tree: ce871a5ee83614fa2abe626c050a644db687b04e /llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
parent: 38cd67624b25d0ffd10e7d4ff9665789afcf3240 (diff)
download: llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.zip
llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.gz
llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.bz2
1 files changed, 11 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 713e147..a3f4523 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -301,11 +301,18 @@ void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask,
   }
 }
 
-void DecodeBLENDMask(MVT VT, unsigned Imm,
-                       SmallVectorImpl<int> &ShuffleMask) {
+void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
+  int ElementBits = VT.getScalarSizeInBits();
   int NumElements = VT.getVectorNumElements();
-  for (int i = 0; i < NumElements; ++i)
-    ShuffleMask.push_back(((Imm >> i) & 1) ? NumElements + i : i);
+  for (int i = 0; i < NumElements; ++i) {
+    // If there are more than 8 elements in the vector, then any immediate blend
+    // mask applies to each 128-bit lane. There can never be more than
+    // 8 elements in a 128-bit lane with an immediate blend.
+    int Bit = NumElements > 8 ? i % (128 / ElementBits) : i;
+    assert(Bit < 8 &&
+           "Immediate blends only operate over 8 elements at a time!");
+    ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElements + i : i);
+  }
 }
 
 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
author	Chandler Carruth <chandlerc@gmail.com>	2014-09-25 00:24:19 +0000
committer	Chandler Carruth <chandlerc@gmail.com>	2014-09-25 00:24:19 +0000
commit	98443d89b964b3b9a7a3c61d4dd5f3194a5604b6 (patch)
tree	ce871a5ee83614fa2abe626c050a644db687b04e /llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
parent	38cd67624b25d0ffd10e7d4ff9665789afcf3240 (diff)
download	llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.zip llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.gz llvm-98443d89b964b3b9a7a3c61d4dd5f3194a5604b6.tar.bz2