diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 83 |
1 files changed, 82 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 2c69878..a70d962 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -30,6 +31,7 @@ using namespace llvm::PatternMatch; #define DEBUG_TYPE "vector-combine" STATISTIC(NumVecCmp, "Number of vector compares formed"); +STATISTIC(NumVecBO, "Number of vector binops formed"); static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) { // Match a cmp with extracted vector operands. @@ -76,6 +78,85 @@ static bool foldExtractCmp(Instruction &I, const TargetTransformInfo &TTI) { return true; } +/// Try to reduce extract element costs by converting scalar binops to vector +/// binops followed by extract. +static bool foldExtractBinop(Instruction &I, const TargetTransformInfo &TTI) { + // It is not safe to transform things like div, urem, etc. because we may + // create undefined behavior when executing those on unknown vector elements. + if (!isSafeToSpeculativelyExecute(&I)) + return false; + + // Match a scalar binop with extracted vector operands: + // bo (extelt X, C0), (extelt Y, C1) + Instruction *Ext0, *Ext1; + if (!match(&I, m_BinOp(m_Instruction(Ext0), m_Instruction(Ext1)))) + return false; + + Value *X, *Y; + uint64_t C0, C1; + if (!match(Ext0, m_ExtractElement(m_Value(X), m_ConstantInt(C0))) || + !match(Ext1, m_ExtractElement(m_Value(Y), m_ConstantInt(C1))) || + X->getType() != Y->getType()) + return false; + + // Check if using a vector binop would be cheaper. + Instruction::BinaryOps BOpcode = cast<BinaryOperator>(I).getOpcode(); + Type *ScalarTy = I.getType(); + Type *VecTy = X->getType(); + int ScalarBOCost = TTI.getArithmeticInstrCost(BOpcode, ScalarTy); + int VecBOCost = TTI.getArithmeticInstrCost(BOpcode, VecTy); + int Extract0Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, + VecTy, C0); + int Extract1Cost = TTI.getVectorInstrCost(Instruction::ExtractElement, + VecTy, C1); + + // Handle a special case - if the extract indexes are the same, the + // replacement sequence does not require a shuffle. Unless the vector binop is + // much more expensive than the scalar binop, this eliminates an extract. + // Extra uses of the extracts mean that we include those costs in the + // vector total because those instructions will not be eliminated. + if (C0 == C1) { + assert(Extract0Cost == Extract1Cost && "Different costs for same extract?"); + int ExtractCost = Extract0Cost; + if (X != Y) { + int ScalarCost = ExtractCost + ExtractCost + ScalarBOCost; + int VecCost = VecBOCost + ExtractCost + + !Ext0->hasOneUse() * ExtractCost + + !Ext1->hasOneUse() * ExtractCost; + if (ScalarCost <= VecCost) + return false; + } else { + // Handle an extra-special case. If the 2 binop operands are identical, + // adjust the formulas to account for that: + // bo (extelt X, C), (extelt X, C) --> extelt (bo X, X), C + // The extra use charge allows for either the CSE'd pattern or an + // unoptimized form with identical values. + bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2) + : !Ext0->hasOneUse() || !Ext1->hasOneUse(); + int ScalarCost = ExtractCost + ScalarBOCost; + int VecCost = VecBOCost + ExtractCost + HasUseTax * ExtractCost; + if (ScalarCost <= VecCost) + return false; + } + + // bo (extelt X, C), (extelt Y, C) --> extelt (bo X, Y), C + ++NumVecBO; + IRBuilder<> Builder(&I); + Value *NewBO = Builder.CreateBinOp(BOpcode, X, Y); + if (auto *VecBOInst = dyn_cast<Instruction>(NewBO)) { + // All IR flags are safe to back-propagate because any potential poison + // created in unused vector elements is discarded by the extract. + VecBOInst->copyIRFlags(&I); + } + Value *Extract = Builder.CreateExtractElement(NewBO, Ext0->getOperand(1)); + I.replaceAllUsesWith(Extract); + return true; + } + + // TODO: Handle C0 != C1 by shuffling 1 of the operands. + return false; +} + /// This is the entry point for all transforms. Pass manager differences are /// handled in the callers of this function. static bool runImpl(Function &F, const TargetTransformInfo &TTI, @@ -92,7 +173,7 @@ static bool runImpl(Function &F, const TargetTransformInfo &TTI, // iteratively in this loop rather than waiting until the end. for (Instruction &I : make_range(BB.rbegin(), BB.rend())) { MadeChange |= foldExtractCmp(I, TTI); - // TODO: More transforms go here. + MadeChange |= foldExtractBinop(I, TTI); } } |