aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp83
1 files changed, 82 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 2c69878..a70d962 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -30,6 +31,7 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "vector-combine"
STATISTIC(NumVecCmp, "Number of vector compares formed");
+STATISTIC(NumVecBO, "Number of vector binops formed");
static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) {
// Match a cmp with extracted vector operands.
@@ -76,6 +78,85 @@ static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) {
return true;
}
+/// Try to reduce extract element costs by converting scalar binops to vector
+/// binops followed by extract.
+static bool foldExtractBinop(Instruction &I, const TargetTransformInfo &TTI) {
+ // It is not safe to transform things like div, urem, etc. because we may
+ // create undefined behavior when executing those on unknown vector elements.
+ if (!isSafeToSpeculativelyExecute(&I))
+ return false;
+
+ // Match a scalar binop with extracted vector operands:
+ // bo (extelt X, C0), (extelt Y, C1)
+ Instruction *Ext0, *Ext1;
+ if (!match(&I, m_BinOp(m_Instruction(Ext0), m_Instruction(Ext1))))
+ return false;
+
+ Value *X, *Y;
+ uint64_t C0, C1;
+ if (!match(Ext0, m_ExtractElement(m_Value(X), m_ConstantInt(C0))) ||
+ !match(Ext1, m_ExtractElement(m_Value(Y), m_ConstantInt(C1))) ||
+ X->getType() != Y->getType())
+ return false;
+
+ // Check if using a vector binop would be cheaper.
+ Instruction::BinaryOps BOpcode = cast<BinaryOperator>(I).getOpcode();
+ Type *ScalarTy = I.getType();
+ Type *VecTy = X->getType();
+ int ScalarBOCost = TTI.getArithmeticInstrCost(BOpcode, ScalarTy);
+ int VecBOCost = TTI.getArithmeticInstrCost(BOpcode, VecTy);
+ int Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement,
+ VecTy, C0);
+ int Extract1Cost = TTI.getVectorInstrCost(Instruction::ExtractElement,
+ VecTy, C1);
+
+ // Handle a special case - if the extract indexes are the same, the
+ // replacement sequence does not require a shuffle. Unless the vector binop is
+ // much more expensive than the scalar binop, this eliminates an extract.
+ // Extra uses of the extracts mean that we include those costs in the
+ // vector total because those instructions will not be eliminated.
+ if (C0 == C1) {
+ assert(Extract0Cost == Extract1Cost && "Different costs for same extract?");
+ int ExtractCost = Extract0Cost;
+ if (X != Y) {
+ int ScalarCost = ExtractCost + ExtractCost + ScalarBOCost;
+ int VecCost = VecBOCost + ExtractCost +
+ !Ext0->hasOneUse() * ExtractCost +
+ !Ext1->hasOneUse() * ExtractCost;
+ if (ScalarCost <= VecCost)
+ return false;
+ } else {
+ // Handle an extra-special case. If the 2 binop operands are identical,
+ // adjust the formulas to account for that:
+ // bo (extelt X, C), (extelt X, C) --> extelt (bo X, X), C
+ // The extra use charge allows for either the CSE'd pattern or an
+ // unoptimized form with identical values.
+ bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
+ : !Ext0->hasOneUse() || !Ext1->hasOneUse();
+ int ScalarCost = ExtractCost + ScalarBOCost;
+ int VecCost = VecBOCost + ExtractCost + HasUseTax * ExtractCost;
+ if (ScalarCost <= VecCost)
+ return false;
+ }
+
+ // bo (extelt X, C), (extelt Y, C) --> extelt (bo X, Y), C
+ ++NumVecBO;
+ IRBuilder<> Builder(&I);
+ Value *NewBO = Builder.CreateBinOp(BOpcode, X, Y);
+ if (auto *VecBOInst = dyn_cast<Instruction>(NewBO)) {
+ // All IR flags are safe to back-propagate because any potential poison
+ // created in unused vector elements is discarded by the extract.
+ VecBOInst->copyIRFlags(&I);
+ }
+ Value *Extract = Builder.CreateExtractElement(NewBO, Ext0->getOperand(1));
+ I.replaceAllUsesWith(Extract);
+ return true;
+ }
+
+ // TODO: Handle C0 != C1 by shuffling 1 of the operands.
+ return false;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
static bool runImpl(Function &F, const TargetTransformInfo &TTI,
@@ -92,7 +173,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI,
// iteratively in this loop rather than waiting until the end.
for (Instruction &I : make_range(BB.rbegin(), BB.rend())) {
MadeChange |= foldExtractCmp(I, TTI);
- // TODO: More transforms go here.
+ MadeChange |= foldExtractBinop(I, TTI);
}
}