aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/IR/AutoUpgrade.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-22 13:58:44 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-07-22 13:58:44 +0000
commitea0d4f9962fbc1741a730ec74b655940ea15424b (patch)
tree47e47fd7351dc3615c9c33add8a694638cec173d /llvm/lib/IR/AutoUpgrade.cpp
parent22c9e931470fea2e25bef1f52128e54ec96da403 (diff)
downloadllvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.zip
llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.gz
llvm-ea0d4f9962fbc1741a730ec74b655940ea15424b.tar.bz2
[X86][AVX] Added support for lowering to VBROADCASTF128/VBROADCASTI128 (reapplied)
As reported on PR26235, we don't currently make use of the VBROADCASTF128/VBROADCASTI128 instructions (or the AVX512 equivalents) to load+splat a 128-bit vector to both lanes of a 256-bit vector. This patch enables lowering from subvector insertion/concatenation patterns and auto-upgrades the llvm.x86.avx.vbroadcastf128.pd.256 / llvm.x86.avx.vbroadcastf128.ps.256 intrinsics to match. We could possibly investigate using VBROADCASTF128/VBROADCASTI128 to load repeated constants as well (similar to how we already do for scalar broadcasts). Reapplied with fix for PR28657 - removed intrinsic definitions (clang companion patch to be be submitted shortly). Differential Revision: https://reviews.llvm.org/D22460 llvm-svn: 276416
Diffstat (limited to 'llvm/lib/IR/AutoUpgrade.cpp')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp21
1 files changed, 14 insertions, 7 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 2e4a2f8..a8145b6 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -296,6 +296,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name.startswith("avx.blend.p") ||
Name == "avx2.pblendw" ||
Name.startswith("avx2.pblendd.") ||
+ Name.startswith("avx.vbroadcastf128") ||
Name == "avx2.vbroadcasti128" ||
Name == "xop.vpcmov" ||
(Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
@@ -886,7 +887,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
Rep = Builder.CreateZExt(Rep, CI->getType(), "");
- } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
+ } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
// Replace broadcasts with a series of insertelements.
Type *VecTy = CI->getType();
Type *EltTy = VecTy->getVectorElementType();
@@ -918,15 +919,21 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
bool DoSext = (StringRef::npos != Name.find("pmovsx"));
Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
: Builder.CreateZExt(SV, DstTy);
- } else if (IsX86 && Name == "avx2.vbroadcasti128") {
- // Replace vbroadcasts with a vector shuffle.
- Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+ } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+ Name == "avx2.vbroadcasti128")) {
+ // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
+ Type *EltTy = CI->getType()->getVectorElementType();
+ unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
+ Type *VT = VectorType::get(EltTy, NumSrcElts);
Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
PointerType::getUnqual(VT));
Value *Load = Builder.CreateLoad(VT, Op);
- uint32_t Idxs[4] = { 0, 1, 0, 1 };
- Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
- Idxs);
+ if (NumSrcElts == 2)
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 0, 1 });
+ else
+ Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+ { 0, 1, 2, 3, 0, 1, 2, 3 });
} else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
Name.startswith("avx2.vbroadcast") ||
Name.startswith("avx512.pbroadcast") ||