aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp')
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp311
1 files changed, 306 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 2462ec3..22d1165 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -20,8 +20,7 @@
//
// TODO List:
//
-// Future loop memory idioms to recognize:
-// memcmp, strlen, etc.
+// Future loop memory idioms to recognize: memcmp, etc.
//
// This could recognize common matrix multiplies and dot product idioms and
// replace them with calls to BLAS (if linked in??).
@@ -33,6 +32,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -97,6 +97,7 @@ using namespace llvm;
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores");
+STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads");
STATISTIC(
NumShiftUntilBitTest,
"Number of uncountable loops recognized as 'shift until bitttest' idiom");
@@ -126,6 +127,26 @@ static cl::opt<bool, true>
cl::location(DisableLIRP::Memcpy), cl::init(false),
cl::ReallyHidden);
+bool DisableLIRP::Strlen;
+static cl::opt<bool, true>
+ DisableLIRPStrlen("disable-loop-idiom-strlen",
+ cl::desc("Proceed with loop idiom recognize pass, but do "
+ "not convert loop(s) to strlen."),
+ cl::location(DisableLIRP::Strlen), cl::init(false),
+ cl::ReallyHidden);
+
+/// Some target libraries have a significant call overhead for `wcslen`,
+/// which can degrade performance when the input string is not long enough
+/// to justify the cost. To avoid unnecessary performance penalties,
+/// we disable it by default.
+bool DisableLIRP::Wcslen;
+static cl::opt<bool, true>
+ EnableLIRPWcslen("enable-loop-idiom-wcslen",
+ cl::desc("Proceed with loop idiom recognize pass, "
+ "enable conversion of loop(s) to wcslen."),
+ cl::location(DisableLIRP::Wcslen), cl::init(true),
+ cl::ReallyHidden);
+
static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling "
@@ -246,6 +267,7 @@ private:
bool recognizeShiftUntilBitTest();
bool recognizeShiftUntilZero();
+ bool recognizeAndInsertStrLen();
/// @}
};
@@ -1494,7 +1516,17 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() {
return recognizePopcount() || recognizeAndInsertFFS() ||
recognizeShiftUntilBitTest() || recognizeShiftUntilZero() ||
- recognizeShiftUntilLessThan();
+ recognizeShiftUntilLessThan() || recognizeAndInsertStrLen();
+}
+
+/// Check if a Value is either a nullptr or a constant int zero
+static bool isZeroConstant(const Value *Val) {
+ if (isa<ConstantPointerNull>(Val))
+ return true;
+ const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val);
+ if (!CmpZero || !CmpZero->isZero())
+ return false;
+ return true;
}
/// Check if the given conditional branch is based on the comparison between
@@ -1512,8 +1544,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
if (!Cond)
return nullptr;
- ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
- if (!CmpZero || !CmpZero->isZero())
+ if (!isZeroConstant(Cond->getOperand(1)))
return nullptr;
BasicBlock *TrueSucc = BI->getSuccessor(0);
@@ -1529,6 +1560,276 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry,
return nullptr;
}
+namespace {
+
+class StrlenVerifier {
+public:
+ explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE,
+ const TargetLibraryInfo *TLI)
+ : CurLoop(CurLoop), SE(SE), TLI(TLI) {}
+
+ bool isValidStrlenIdiom() {
+ // Give up if the loop has multiple blocks, multiple backedges, or
+ // multiple exit blocks
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 ||
+ !CurLoop->getUniqueExitBlock())
+ return false;
+
+ // It should have a preheader and a branch instruction.
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ if (!Preheader)
+ return false;
+
+ BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator());
+ if (!EntryBI)
+ return false;
+
+ // The loop exit must be conditioned on an icmp with 0 the null terminator.
+ // The icmp operand has to be a load on some SSA reg that increments
+ // by 1 in the loop.
+ BasicBlock *LoopBody = *CurLoop->block_begin();
+
+ // Skip if the body is too big as it most likely is not a strlen idiom.
+ if (!LoopBody || LoopBody->size() >= 15)
+ return false;
+
+ BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator());
+ Value *LoopCond = matchCondition(LoopTerm, LoopBody);
+ if (!LoopCond)
+ return false;
+
+ LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond);
+ if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0)
+ return false;
+
+ OperandType = LoopLoad->getType();
+ if (!OperandType || !OperandType->isIntegerTy())
+ return false;
+
+ // See if the pointer expression is an AddRec with constant step a of form
+ // ({n,+,a}) where a is the width of the char type.
+ Value *IncPtr = LoopLoad->getPointerOperand();
+ const SCEVAddRecExpr *LoadEv =
+ dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr));
+ if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+ return false;
+ LoadBaseEv = LoadEv->getStart();
+
+ LLVM_DEBUG({
+ dbgs() << "pointer load scev: ";
+ LoadEv->print(outs());
+ dbgs() << "\n";
+ });
+
+ const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE));
+ if (!Step)
+ return false;
+
+ unsigned StepSize = 0;
+ StepSizeCI = dyn_cast<ConstantInt>(Step->getValue());
+ if (!StepSizeCI)
+ return false;
+ StepSize = StepSizeCI->getZExtValue();
+
+ // Verify that StepSize is consistent with platform char width.
+ OpWidth = OperandType->getIntegerBitWidth();
+ unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule());
+ if (OpWidth != StepSize * 8)
+ return false;
+ if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32)
+ return false;
+ if (OpWidth >= 16)
+ if (OpWidth != WcharSize * 8)
+ return false;
+
+ // Scan every instruction in the loop to ensure there are no side effects.
+ for (Instruction &I : *LoopBody)
+ if (I.mayHaveSideEffects())
+ return false;
+
+ BasicBlock *LoopExitBB = CurLoop->getExitBlock();
+ if (!LoopExitBB)
+ return false;
+
+ for (PHINode &PN : LoopExitBB->phis()) {
+ if (!SE->isSCEVable(PN.getType()))
+ return false;
+
+ const SCEV *Ev = SE->getSCEV(&PN);
+ if (!Ev)
+ return false;
+
+ LLVM_DEBUG({
+ dbgs() << "loop exit phi scev: ";
+ Ev->print(dbgs());
+ dbgs() << "\n";
+ });
+
+ // Since we verified that the loop trip count will be a valid strlen
+ // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use
+ // SCEVExpander materialize the loop output.
+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
+ if (!AddRecEv || !AddRecEv->isAffine())
+ return false;
+
+ // We only want RecAddExpr with recurrence step that is constant. This
+ // is good enough for all the idioms we want to recognize. Later we expand
+ // and materialize the recurrence as {base,+,a} -> (base + a * strlen)
+ if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)))
+ return false;
+ }
+
+ return true;
+ }
+
+public:
+ const Loop *CurLoop;
+ ScalarEvolution *SE;
+ const TargetLibraryInfo *TLI;
+
+ unsigned OpWidth;
+ ConstantInt *StepSizeCI;
+ const SCEV *LoadBaseEv;
+ Type *OperandType;
+};
+
+} // namespace
+
+/// The Strlen Idiom we are trying to detect has the following structure
+///
+/// preheader:
+/// ...
+/// br label %body, ...
+///
+/// body:
+/// ... ; %0 is incremented by a gep
+/// %1 = load i8, ptr %0, align 1
+/// %2 = icmp eq i8 %1, 0
+/// br i1 %2, label %exit, label %body
+///
+/// exit:
+/// %lcssa = phi [%0, %body], ...
+///
+/// We expect the strlen idiom to have a load of a character type that
+/// is compared against '\0', and such load pointer operand must have scev
+/// expression of the form {%str,+,c} where c is a ConstantInt of the
+/// appropiate character width for the idiom, and %str is the base of the string
+/// And, that all lcssa phis have the form {...,+,n} where n is a constant,
+///
+/// When transforming the output of the strlen idiom, the lccsa phi are
+/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen)
+/// and all subsequent uses are replaced. For example,
+///
+/// \code{.c}
+/// const char* base = str;
+/// while (*str != '\0')
+/// ++str;
+/// size_t result = str - base;
+/// \endcode
+///
+/// will be transformed as follows: The idiom will be replaced by a strlen
+/// computation to compute the address of the null terminator of the string.
+///
+/// \code{.c}
+/// const char* base = str;
+/// const char* end = base + strlen(str);
+/// size_t result = end - base;
+/// \endcode
+///
+/// In the case we index by an induction variable, as long as the induction
+/// variable has a constant int increment, we can replace all such indvars
+/// with the closed form computation of strlen
+///
+/// \code{.c}
+/// size_t i = 0;
+/// while (str[i] != '\0')
+/// ++i;
+/// size_t result = i;
+/// \endcode
+///
+/// Will be replaced by
+///
+/// \code{.c}
+/// size_t i = 0 + strlen(str);
+/// size_t result = i;
+/// \endcode
+///
+bool LoopIdiomRecognize::recognizeAndInsertStrLen() {
+ if (DisableLIRP::All)
+ return false;
+
+ StrlenVerifier Verifier(CurLoop, SE, TLI);
+
+ if (!Verifier.isValidStrlenIdiom())
+ return false;
+
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
+ BasicBlock *LoopExitBB = CurLoop->getExitBlock();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+ SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(),
+ "strlen_idiom");
+ Value *MaterialzedBase = Expander.expandCodeFor(
+ Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(),
+ Builder.GetInsertPoint());
+
+ Value *StrLenFunc = nullptr;
+ if (Verifier.OpWidth == 8) {
+ if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen))
+ return false;
+ StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI);
+ } else {
+ if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen) &&
+ !DisableLIRP::Wcslen)
+ return false;
+ StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI);
+ }
+ assert(StrLenFunc && "Failed to emit strlen function.");
+
+ const SCEV *StrlenEv = SE->getSCEV(StrLenFunc);
+ SmallVector<PHINode *, 4> Cleanup;
+ for (PHINode &PN : LoopExitBB->phis()) {
+ // We can now materialize the loop output as all phi have scev {base,+,a}.
+ // We expand the phi as:
+ // %strlen = call i64 @strlen(%str)
+ // %phi.new = base expression + step * %strlen
+ const SCEV *Ev = SE->getSCEV(&PN);
+ const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev);
+ const SCEVConstant *Step =
+ dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE));
+ const SCEV *Base = AddRecEv->getStart();
+
+ // It is safe to truncate to base since if base is narrower than size_t
+ // the equivalent user code will have to truncate anyways.
+ const SCEV *NewEv = SE->getAddExpr(
+ Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend(
+ StrlenEv, Base->getType())));
+
+ Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(),
+ Builder.GetInsertPoint());
+ Expander.clear();
+ PN.replaceAllUsesWith(MaterializedPHI);
+ Cleanup.push_back(&PN);
+ }
+
+ // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned
+ // up by later passes
+ for (PHINode *PN : Cleanup)
+ RecursivelyDeleteDeadPHINode(PN);
+ SE->forgetLoop(CurLoop);
+
+ ++NumStrLen;
+ LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n");
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen",
+ CurLoop->getStartLoc(), Preheader)
+ << "Transformed " << StrLenFunc->getName() << " loop idiom";
+ });
+
+ return true;
+}
+
/// Check if the given conditional branch is based on an unsigned less-than
/// comparison between a variable and a constant, and if the comparison is false
/// the control yields to the loop entry. If the branch matches the behaviour,