[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 (reapplied)

As reported on PR26235, we don't currently make use of the VBROADCASTF128/VBROADCASTI128 instructions (or the AVX512 equivalents) to load+splat a 128-bit vector to both lanes of a 256-bit vector. This patch enables lowering from subvector insertion/concatenation patterns and auto-upgrades the llvm.x86.avx.vbroadcastf128.pd.256 / llvm.x86.avx.vbroadcastf128.ps.256 intrinsics to match. We could possibly investigate using VBROADCASTF128/VBROADCASTI128 to load repeated constants as well (similar to how we already do for scalar broadcasts). Reapplied with fix for PR28657 - removed intrinsic definitions (clang companion patch to be be submitted shortly). Differential Revision: https://reviews.llvm.org/D22460 llvm-svn: 276416
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-07-22 13:58:44 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2016-07-22 13:58:44 +0000
commit: ea0d4f9962fbc1741a730ec74b655940ea15424b (patch)
tree: 47e47fd7351dc3615c9c33add8a694638cec173d /llvm/lib/IR/AutoUpgrade.cpp
parent: 22c9e931470fea2e25bef1f52128e54ec96da403 (diff)
download: llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.zip
llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.gz
llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.bz2
1 files changed, 14 insertions, 7 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2e4a2f8..a8145b6 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -296,6 +296,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
          Name.startswith("avx.blend.p") ||
          Name == "avx2.pblendw" ||
          Name.startswith("avx2.pblendd.") ||
+         Name.startswith("avx.vbroadcastf128") ||
          Name == "avx2.vbroadcasti128" ||
          Name == "xop.vpcmov" ||
          (Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
@@ -886,7 +887,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
-    } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
+    } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
       // Replace broadcasts with a series of insertelements.
       Type *VecTy = CI->getType();
       Type *EltTy = VecTy->getVectorElementType();
@@ -918,15 +919,21 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
                    : Builder.CreateZExt(SV, DstTy);
-    } else if (IsX86 && Name == "avx2.vbroadcasti128") {
-      // Replace vbroadcasts with a vector shuffle.
-      Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+    } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+                         Name == "avx2.vbroadcasti128")) {
+      // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
+      Type *EltTy = CI->getType()->getVectorElementType();
+      unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
+      Type *VT = VectorType::get(EltTy, NumSrcElts);
       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
                                             PointerType::getUnqual(VT));
       Value *Load = Builder.CreateLoad(VT, Op);
-      uint32_t Idxs[4] = { 0, 1, 0, 1 };
-      Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
-                                        Idxs);
+      if (NumSrcElts == 2)
+        Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+                                          { 0, 1, 0, 1 });
+      else
+        Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+                                          { 0, 1, 2, 3, 0, 1, 2, 3 });
     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
                          Name.startswith("avx2.vbroadcast") ||
                          Name.startswith("avx512.pbroadcast") ||
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-07-22 13:58:44 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2016-07-22 13:58:44 +0000
commit	ea0d4f9962fbc1741a730ec74b655940ea15424b (patch)
tree	47e47fd7351dc3615c9c33add8a694638cec173d /llvm/lib/IR/AutoUpgrade.cpp
parent	22c9e931470fea2e25bef1f52128e54ec96da403 (diff)
download	llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.zip llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.gz llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.bz2