diff options
author | Sotiris Apostolakis <apostolakis@google.com> | 2022-09-16 20:34:37 +0000 |
---|---|---|
committer | Sotiris Apostolakis <apostolakis@google.com> | 2022-09-16 20:50:46 +0000 |
commit | b827e7c6007476504af67290cda2935810dfcacd (patch) | |
tree | 7633c2dbaf2f563d00571744b39ce3ac19472cba /llvm/lib/CodeGen/SelectOptimize.cpp | |
parent | f5cbbb998826bc84cbebd12f890622b3ec3184fe (diff) | |
download | llvm-b827e7c6007476504af67290cda2935810dfcacd.zip llvm-b827e7c6007476504af67290cda2935810dfcacd.tar.gz llvm-b827e7c6007476504af67290cda2935810dfcacd.tar.bz2 |
[SelectOpti] Restrict load sinking
This is a follow-up to D133777, which resolved a use-after-free case but
did not cover all possible memory bugs due to misplacement of loads.
In short, the overall problem was that sinked loads could be moved after
state-modifying instructions leading to memory bugs.
The solution is to restrict load sinking unless it is found to be sound.
i) Within a basic block (to-be-sinked load and select-user are in the same BB),
loads can be sinked only if there is no intervening state-modifying instruction.
This is a conservative approach to avoid resorting to alias analysis to detect
potential memory overlap.
ii) Across basic blocks, sinking of loads is avoided. This is because going over
multiple basic blocks looking for memory conflicts could be computationally
expensive and also unlikely to allow loads to sink. Further, experiments showed
that not sinking these loads has a slight positive performance effect.
Maybe for some of these loads, having some separation allows enough time
for the load to be executed in time for its user. This is not the case for
floating point operations that benefit more from sinking.
The solution in D133777 was essentially undone in this patch,
since the latter is a complete solution to the observed problem.
Overall, the performance impact of this patch is minimal.
Tested on two internal Google workloads with instrPGO.
Search application showed <0.05% perf difference,
while the database one showed a slight improvement,
but not statistically significant.
Reviewed By: davidxl
Differential Revision: https://reviews.llvm.org/D133999
Diffstat (limited to 'llvm/lib/CodeGen/SelectOptimize.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectOptimize.cpp | 50 |
1 files changed, 28 insertions, 22 deletions
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp index ff1df22..3fdf1fc 100644 --- a/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/llvm/lib/CodeGen/SelectOptimize.cpp @@ -29,7 +29,6 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -182,7 +181,7 @@ private: // consisting of instructions exclusively computed for producing the operands // of the source instruction. void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, - bool ForSinking = false); + Instruction *SI, bool ForSinking = false); // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); @@ -377,13 +376,13 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // false operands. if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) { std::stack<Instruction *> TrueSlice; - getExclBackwardsSlice(TI, TrueSlice, true); + getExclBackwardsSlice(TI, TrueSlice, SI, true); maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); TrueSlices.push_back(TrueSlice); } if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) { std::stack<Instruction *> FalseSlice; - getExclBackwardsSlice(FI, FalseSlice, true); + getExclBackwardsSlice(FI, FalseSlice, SI, true); maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); FalseSlices.push_back(FalseSlice); } @@ -470,22 +469,6 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); FalseBranch->setDebugLoc(SI->getDebugLoc()); } - // If there was sinking, move any lifetime-end intrinsic calls found in the - // StartBlock to the newly-created end block to ensure sound lifetime info - // after sinking (in case any use is sinked). - else { - SmallVector<Instruction *, 2> EndLifetimeCalls; - for (Instruction &I : *StartBlock) { - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_end) { - EndLifetimeCalls.push_back(&I); - } - } - } - for (auto *LC : EndLifetimeCalls) { - LC->moveBefore(&*EndBlock->getFirstInsertionPt()); - } - } // Insert the real conditional branch based on the original condition. // If we did not create a new block for one of the 'true' or 'false' paths @@ -700,7 +683,7 @@ bool SelectOptimize::hasExpensiveColdOperand( } if (ColdI) { std::stack<Instruction *> ColdSlice; - getExclBackwardsSlice(ColdI, ColdSlice); + getExclBackwardsSlice(ColdI, ColdSlice, SI); InstructionCost SliceCost = 0; while (!ColdSlice.empty()) { SliceCost += TTI->getInstructionCost(ColdSlice.top(), @@ -721,6 +704,22 @@ bool SelectOptimize::hasExpensiveColdOperand( return false; } +// Check if it is safe to move LoadI next to the SI. +// Conservatively assume it is safe only if there is no instruction +// modifying memory in-between the load and the select instruction. +static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { + // Assume loads from different basic blocks are unsafe to move. + if (LoadI->getParent() != SI->getParent()) + return false; + auto It = LoadI->getIterator(); + while (&*It != SI) { + if (It->mayWriteToMemory()) + return false; + It++; + } + return true; +} + // For a given source instruction, collect its backwards dependence slice // consisting of instructions exclusively computed for the purpose of producing // the operands of the source instruction. As an approximation @@ -729,7 +728,7 @@ bool SelectOptimize::hasExpensiveColdOperand( // form an one-use chain that leads to the source instruction. void SelectOptimize::getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, - bool ForSinking) { + Instruction *SI, bool ForSinking) { SmallPtrSet<Instruction *, 2> Visited; std::queue<Instruction *> Worklist; Worklist.push(I); @@ -751,6 +750,13 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, isa<SelectInst>(II) || isa<PHINode>(II))) continue; + // Avoid sinking loads in order not to skip state-modifying instructions, + // that may alias with the loaded address. + // Only allow sinking of loads within the same basic block that are + // conservatively proven to be safe. + if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) + continue; + // Avoid considering instructions with less frequency than the source // instruction (i.e., avoid colder code regions of the dependence slice). if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) |