diff options
Diffstat (limited to 'llvm/lib/Transforms')
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 311 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 9 |
2 files changed, 315 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 2462ec3..22d1165 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -20,8 +20,7 @@ // // TODO List: // -// Future loop memory idioms to recognize: -// memcmp, strlen, etc. +// Future loop memory idioms to recognize: memcmp, etc. // // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). @@ -33,6 +32,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -97,6 +97,7 @@ using namespace llvm; STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); STATISTIC(NumMemMove, "Number of memmove's formed from loop load+stores"); +STATISTIC(NumStrLen, "Number of strlen's and wcslen's formed from loop loads"); STATISTIC( NumShiftUntilBitTest, "Number of uncountable loops recognized as 'shift until bitttest' idiom"); @@ -126,6 +127,26 @@ static cl::opt<bool, true> cl::location(DisableLIRP::Memcpy), cl::init(false), cl::ReallyHidden); +bool DisableLIRP::Strlen; +static cl::opt<bool, true> + DisableLIRPStrlen("disable-loop-idiom-strlen", + cl::desc("Proceed with loop idiom recognize pass, but do " + "not convert loop(s) to strlen."), + cl::location(DisableLIRP::Strlen), cl::init(false), + cl::ReallyHidden); + +/// Some target libraries have a significant call overhead for `wcslen`, +/// which can degrade performance when the input string is not long enough +/// to justify the cost. To avoid unnecessary performance penalties, +/// we disable it by default. +bool DisableLIRP::Wcslen; +static cl::opt<bool, true> + EnableLIRPWcslen("enable-loop-idiom-wcslen", + cl::desc("Proceed with loop idiom recognize pass, " + "enable conversion of loop(s) to wcslen."), + cl::location(DisableLIRP::Wcslen), cl::init(true), + cl::ReallyHidden); + static cl::opt<bool> UseLIRCodeSizeHeurs( "use-lir-code-size-heurs", cl::desc("Use loop idiom recognition code size heuristics when compiling " @@ -246,6 +267,7 @@ private: bool recognizeShiftUntilBitTest(); bool recognizeShiftUntilZero(); + bool recognizeAndInsertStrLen(); /// @} }; @@ -1494,7 +1516,17 @@ bool LoopIdiomRecognize::runOnNoncountableLoop() { return recognizePopcount() || recognizeAndInsertFFS() || recognizeShiftUntilBitTest() || recognizeShiftUntilZero() || - recognizeShiftUntilLessThan(); + recognizeShiftUntilLessThan() || recognizeAndInsertStrLen(); +} + +/// Check if a Value is either a nullptr or a constant int zero +static bool isZeroConstant(const Value *Val) { + if (isa<ConstantPointerNull>(Val)) + return true; + const ConstantInt *CmpZero = dyn_cast<ConstantInt>(Val); + if (!CmpZero || !CmpZero->isZero()) + return false; + return true; } /// Check if the given conditional branch is based on the comparison between @@ -1512,8 +1544,7 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, if (!Cond) return nullptr; - ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1)); - if (!CmpZero || !CmpZero->isZero()) + if (!isZeroConstant(Cond->getOperand(1))) return nullptr; BasicBlock *TrueSucc = BI->getSuccessor(0); @@ -1529,6 +1560,276 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry, return nullptr; } +namespace { + +class StrlenVerifier { +public: + explicit StrlenVerifier(const Loop *CurLoop, ScalarEvolution *SE, + const TargetLibraryInfo *TLI) + : CurLoop(CurLoop), SE(SE), TLI(TLI) {} + + bool isValidStrlenIdiom() { + // Give up if the loop has multiple blocks, multiple backedges, or + // multiple exit blocks + if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1 || + !CurLoop->getUniqueExitBlock()) + return false; + + // It should have a preheader and a branch instruction. + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + if (!Preheader) + return false; + + BranchInst *EntryBI = dyn_cast<BranchInst>(Preheader->getTerminator()); + if (!EntryBI) + return false; + + // The loop exit must be conditioned on an icmp with 0 the null terminator. + // The icmp operand has to be a load on some SSA reg that increments + // by 1 in the loop. + BasicBlock *LoopBody = *CurLoop->block_begin(); + + // Skip if the body is too big as it most likely is not a strlen idiom. + if (!LoopBody || LoopBody->size() >= 15) + return false; + + BranchInst *LoopTerm = dyn_cast<BranchInst>(LoopBody->getTerminator()); + Value *LoopCond = matchCondition(LoopTerm, LoopBody); + if (!LoopCond) + return false; + + LoadInst *LoopLoad = dyn_cast<LoadInst>(LoopCond); + if (!LoopLoad || LoopLoad->getPointerAddressSpace() != 0) + return false; + + OperandType = LoopLoad->getType(); + if (!OperandType || !OperandType->isIntegerTy()) + return false; + + // See if the pointer expression is an AddRec with constant step a of form + // ({n,+,a}) where a is the width of the char type. + Value *IncPtr = LoopLoad->getPointerOperand(); + const SCEVAddRecExpr *LoadEv = + dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IncPtr)); + if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) + return false; + LoadBaseEv = LoadEv->getStart(); + + LLVM_DEBUG({ + dbgs() << "pointer load scev: "; + LoadEv->print(outs()); + dbgs() << "\n"; + }); + + const SCEVConstant *Step = + dyn_cast<SCEVConstant>(LoadEv->getStepRecurrence(*SE)); + if (!Step) + return false; + + unsigned StepSize = 0; + StepSizeCI = dyn_cast<ConstantInt>(Step->getValue()); + if (!StepSizeCI) + return false; + StepSize = StepSizeCI->getZExtValue(); + + // Verify that StepSize is consistent with platform char width. + OpWidth = OperandType->getIntegerBitWidth(); + unsigned WcharSize = TLI->getWCharSize(*LoopLoad->getModule()); + if (OpWidth != StepSize * 8) + return false; + if (OpWidth != 8 && OpWidth != 16 && OpWidth != 32) + return false; + if (OpWidth >= 16) + if (OpWidth != WcharSize * 8) + return false; + + // Scan every instruction in the loop to ensure there are no side effects. + for (Instruction &I : *LoopBody) + if (I.mayHaveSideEffects()) + return false; + + BasicBlock *LoopExitBB = CurLoop->getExitBlock(); + if (!LoopExitBB) + return false; + + for (PHINode &PN : LoopExitBB->phis()) { + if (!SE->isSCEVable(PN.getType())) + return false; + + const SCEV *Ev = SE->getSCEV(&PN); + if (!Ev) + return false; + + LLVM_DEBUG({ + dbgs() << "loop exit phi scev: "; + Ev->print(dbgs()); + dbgs() << "\n"; + }); + + // Since we verified that the loop trip count will be a valid strlen + // idiom, we can expand all lcssa phi with {n,+,1} as (n + strlen) and use + // SCEVExpander materialize the loop output. + const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev); + if (!AddRecEv || !AddRecEv->isAffine()) + return false; + + // We only want RecAddExpr with recurrence step that is constant. This + // is good enough for all the idioms we want to recognize. Later we expand + // and materialize the recurrence as {base,+,a} -> (base + a * strlen) + if (!dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE))) + return false; + } + + return true; + } + +public: + const Loop *CurLoop; + ScalarEvolution *SE; + const TargetLibraryInfo *TLI; + + unsigned OpWidth; + ConstantInt *StepSizeCI; + const SCEV *LoadBaseEv; + Type *OperandType; +}; + +} // namespace + +/// The Strlen Idiom we are trying to detect has the following structure +/// +/// preheader: +/// ... +/// br label %body, ... +/// +/// body: +/// ... ; %0 is incremented by a gep +/// %1 = load i8, ptr %0, align 1 +/// %2 = icmp eq i8 %1, 0 +/// br i1 %2, label %exit, label %body +/// +/// exit: +/// %lcssa = phi [%0, %body], ... +/// +/// We expect the strlen idiom to have a load of a character type that +/// is compared against '\0', and such load pointer operand must have scev +/// expression of the form {%str,+,c} where c is a ConstantInt of the +/// appropiate character width for the idiom, and %str is the base of the string +/// And, that all lcssa phis have the form {...,+,n} where n is a constant, +/// +/// When transforming the output of the strlen idiom, the lccsa phi are +/// expanded using SCEVExpander as {base scev,+,a} -> (base scev + a * strlen) +/// and all subsequent uses are replaced. For example, +/// +/// \code{.c} +/// const char* base = str; +/// while (*str != '\0') +/// ++str; +/// size_t result = str - base; +/// \endcode +/// +/// will be transformed as follows: The idiom will be replaced by a strlen +/// computation to compute the address of the null terminator of the string. +/// +/// \code{.c} +/// const char* base = str; +/// const char* end = base + strlen(str); +/// size_t result = end - base; +/// \endcode +/// +/// In the case we index by an induction variable, as long as the induction +/// variable has a constant int increment, we can replace all such indvars +/// with the closed form computation of strlen +/// +/// \code{.c} +/// size_t i = 0; +/// while (str[i] != '\0') +/// ++i; +/// size_t result = i; +/// \endcode +/// +/// Will be replaced by +/// +/// \code{.c} +/// size_t i = 0 + strlen(str); +/// size_t result = i; +/// \endcode +/// +bool LoopIdiomRecognize::recognizeAndInsertStrLen() { + if (DisableLIRP::All) + return false; + + StrlenVerifier Verifier(CurLoop, SE, TLI); + + if (!Verifier.isValidStrlenIdiom()) + return false; + + BasicBlock *Preheader = CurLoop->getLoopPreheader(); + BasicBlock *LoopExitBB = CurLoop->getExitBlock(); + + IRBuilder<> Builder(Preheader->getTerminator()); + SCEVExpander Expander(*SE, Preheader->getModule()->getDataLayout(), + "strlen_idiom"); + Value *MaterialzedBase = Expander.expandCodeFor( + Verifier.LoadBaseEv, Verifier.LoadBaseEv->getType(), + Builder.GetInsertPoint()); + + Value *StrLenFunc = nullptr; + if (Verifier.OpWidth == 8) { + if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_strlen)) + return false; + StrLenFunc = emitStrLen(MaterialzedBase, Builder, *DL, TLI); + } else { + if (!isLibFuncEmittable(Preheader->getModule(), TLI, LibFunc_wcslen) && + !DisableLIRP::Wcslen) + return false; + StrLenFunc = emitWcsLen(MaterialzedBase, Builder, *DL, TLI); + } + assert(StrLenFunc && "Failed to emit strlen function."); + + const SCEV *StrlenEv = SE->getSCEV(StrLenFunc); + SmallVector<PHINode *, 4> Cleanup; + for (PHINode &PN : LoopExitBB->phis()) { + // We can now materialize the loop output as all phi have scev {base,+,a}. + // We expand the phi as: + // %strlen = call i64 @strlen(%str) + // %phi.new = base expression + step * %strlen + const SCEV *Ev = SE->getSCEV(&PN); + const SCEVAddRecExpr *AddRecEv = dyn_cast<SCEVAddRecExpr>(Ev); + const SCEVConstant *Step = + dyn_cast<SCEVConstant>(AddRecEv->getStepRecurrence(*SE)); + const SCEV *Base = AddRecEv->getStart(); + + // It is safe to truncate to base since if base is narrower than size_t + // the equivalent user code will have to truncate anyways. + const SCEV *NewEv = SE->getAddExpr( + Base, SE->getMulExpr(Step, SE->getTruncateOrSignExtend( + StrlenEv, Base->getType()))); + + Value *MaterializedPHI = Expander.expandCodeFor(NewEv, NewEv->getType(), + Builder.GetInsertPoint()); + Expander.clear(); + PN.replaceAllUsesWith(MaterializedPHI); + Cleanup.push_back(&PN); + } + + // All LCSSA Loop Phi are dead, the left over dead loop body can be cleaned + // up by later passes + for (PHINode *PN : Cleanup) + RecursivelyDeleteDeadPHINode(PN); + SE->forgetLoop(CurLoop); + + ++NumStrLen; + LLVM_DEBUG(dbgs() << " Formed strlen idiom: " << *StrLenFunc << "\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "recognizeAndInsertStrLen", + CurLoop->getStartLoc(), Preheader) + << "Transformed " << StrLenFunc->getName() << " loop idiom"; + }); + + return true; +} + /// Check if the given conditional branch is based on an unsigned less-than /// comparison between a variable and a constant, and if the comparison is false /// the control yields to the loop entry. If the branch matches the behaviour, diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 2301be6..24eefc9 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1582,6 +1582,15 @@ Value *llvm::emitStrLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, return emitLibCall(LibFunc_strlen, SizeTTy, CharPtrTy, Ptr, B, TLI); } +Value *llvm::emitWcsLen(Value *Ptr, IRBuilderBase &B, const DataLayout &DL, + const TargetLibraryInfo *TLI) { + assert(Ptr && Ptr->getType()->isPointerTy() && + "Argument to wcslen intrinsic must be a pointer."); + Type *PtrTy = B.getPtrTy(); + Type *SizeTTy = getSizeTTy(B, TLI); + return emitLibCall(LibFunc_wcslen, SizeTTy, PtrTy, Ptr, B, TLI); +} + Value *llvm::emitStrDup(Value *Ptr, IRBuilderBase &B, const TargetLibraryInfo *TLI) { Type *CharPtrTy = B.getPtrTy(); |