aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
authorMax Kazantsev <mkazantsev@azul.com>2021-03-04 12:01:39 +0700
committerMax Kazantsev <mkazantsev@azul.com>2021-03-04 15:22:55 +0700
commitd9e93e8e57fe63babc319cbaf84f1afeccb83696 (patch)
tree91f17200de741b69e2815310c356d78c5d35433a /llvm/lib/CodeGen/CodeGenPrepare.cpp
parentb15ce2f344ac7845729d2be0a8316b20a32c6292 (diff)
downloadllvm-d9e93e8e57fe63babc319cbaf84f1afeccb83696.zip
llvm-d9e93e8e57fe63babc319cbaf84f1afeccb83696.tar.gz
llvm-d9e93e8e57fe63babc319cbaf84f1afeccb83696.tar.bz2
[X86][CodeGenPrepare] Try to reuse IV's incremented value instead of adding the offset, part 1
While optimizing the memory instruction, we sometimes need to add offset to the value of `IV`. We could avoid doing so if the `IV.next` is already defined at the point of interest. In this case, we may get two possible advantages from this: - If the `IV` step happens to match with the offset, we don't need to add the offset at all; - We reduce overlap of live ranges of `IV` and `IV.next`. They may stop overlapping and it will lead to better register allocation. Even if the overlap will preserve, we are not introducing a new overlap, so it should be a neutral transform (Disabled this patch, will come with follow-up). Currently I've only added support for IVs that get decremented using `usub` intrinsic. We could also support `AddInstr`, however there is some weird interaction with some other transform that may lead to infinite compilation in this case (seems like same transform is done and undone over and over). I need to investigate why it happens, but generally we could do that too. The first part only handles case where this reuse fully elimiates the offset. Differential Revision: https://reviews.llvm.org/D96399 Reviewed By: reames
Diffstat (limited to 'llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp100
1 files changed, 80 insertions, 20 deletions
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d87d500..ecf14b1 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -3064,6 +3064,8 @@ class AddressingModeMatcher {
const TargetLowering &TLI;
const TargetRegisterInfo &TRI;
const DataLayout &DL;
+ const LoopInfo &LI;
+ const DominatorTree &DT;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
@@ -3099,16 +3101,17 @@ class AddressingModeMatcher {
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
- const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
+ const TargetRegisterInfo &TRI, const LoopInfo &LI,
+ const DominatorTree &DT, Type *AT, unsigned AS, Instruction *MI,
ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
- DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
- MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
- PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
- OptSize(OptSize), PSI(PSI), BFI(BFI) {
+ DL(MI->getModule()->getDataLayout()), LI(LI), DT(DT), AccessTy(AT),
+ AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
+ InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
+ LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@@ -3123,18 +3126,17 @@ public:
static ExtAddrMode
Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
SmallVectorImpl<Instruction *> &AddrModeInsts,
- const TargetLowering &TLI, const TargetRegisterInfo &TRI,
- const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
- TypePromotionTransaction &TPT,
+ const TargetLowering &TLI, const LoopInfo &LI, const DominatorTree &DT,
+ const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
- MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT, LargeOffsetGEP,
- OptSize, PSI, BFI)
- .matchAddr(V, 0);
+ bool Success = AddressingModeMatcher(
+ AddrModeInsts, TLI, TRI, LI, DT, AccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ BFI).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
@@ -3830,10 +3832,12 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
- // X*Scale + C*Scale to addr mode.
+ // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
+ // go any further: we can reuse it and cannot eliminate it.
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
+ !isIVIncrement(cast<BinaryOperator>(ScaleReg), &LI) &&
CI->getValue().isSignedIntN(64)) {
TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
@@ -3846,9 +3850,63 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
AddrMode = TestAddrMode;
return true;
}
+ // Restore status quo.
+ TestAddrMode = AddrMode;
}
- // Otherwise, not (x+c)*scale, just return what we have.
+ auto GetConstantStep = [this](const Value * V)
+ ->Optional<std::pair<Instruction *, APInt> > {
+ auto *PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return None;
+ auto IVInc = getIVIncrement(PN, &LI);
+ if (!IVInc)
+ return None;
+ // TODO: The result of the intrinsics above is two-compliment. However when
+ // IV inc is expressed as add or sub, iv.next is potentially a poison value.
+ // If it has nuw or nsw flags, we need to make sure that these flags are
+ // inferrable at the point of memory instruction. Otherwise we are replacing
+ // well-defined two-compliment computation with poison. Currently, to avoid
+ // potentially complex analysis needed to prove this, we reject such cases.
+ if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
+ if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
+ return None;
+ if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
+ return std::make_pair(IVInc->first, ConstantStep->getValue());
+ return None;
+ };
+
+ // Try to account for the following special case:
+ // 1. ScaleReg is an inductive variable;
+ // 2. We use it with non-zero offset;
+ // 3. IV's increment is available at the point of memory instruction.
+ //
+ // In this case, we may reuse the IV increment instead of the IV Phi to
+ // achieve the following advantages:
+ // 1. If IV step matches the offset, we will have no need in the offset;
+ if (AddrMode.BaseOffs) {
+ if (auto IVStep = GetConstantStep(ScaleReg)) {
+ Instruction *IVInc = IVStep->first;
+ APInt Step = IVStep->second;
+ APInt Offset = Step * AddrMode.Scale;
+ if (Offset.isSignedIntN(64) && TestAddrMode.BaseOffs == Offset &&
+ DT.dominates(IVInc, MemoryInst)) {
+ TestAddrMode.InBounds = false;
+ TestAddrMode.ScaledReg = IVInc;
+ TestAddrMode.BaseOffs -= Offset.getLimitedValue();
+ // If this addressing mode is legal, commit it..
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
+ AddrModeInsts.push_back(cast<Instruction>(IVInc));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+ }
+ }
+
+ // Otherwise, just return what we have.
return true;
}
@@ -4938,9 +4996,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
0);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(
- MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, DT,
+ AddressAccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT,
+ LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -5043,9 +5102,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrModeInsts.clear();
std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
0);
+ Function *F = MemoryInst->getParent()->getParent();
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDT(*F),
+ *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
BFI.get());
GetElementPtrInst *GEP = LargeOffsetGEP.first;