diff options
author | James Molloy <james.molloy@arm.com> | 2016-08-01 07:45:11 +0000 |
---|---|---|
committer | James Molloy <james.molloy@arm.com> | 2016-08-01 07:45:11 +0000 |
commit | b2e436de423aac1de4d764166317dd7ab9cbf751 (patch) | |
tree | 091b83177e1acf38e066b816a525bbe2b9b599cc /llvm/lib/Transforms/Utils/SimplifyCFG.cpp | |
parent | 9f0546b5a9bf407b46ac96cd76a387a9bdcf28cb (diff) | |
download | llvm-b2e436de423aac1de4d764166317dd7ab9cbf751.zip llvm-b2e436de423aac1de4d764166317dd7ab9cbf751.tar.gz llvm-b2e436de423aac1de4d764166317dd7ab9cbf751.tar.bz2 |
[SimplifyCFG] Range reduce switches
If a switch is sparse and all the cases (once sorted) are in arithmetic progression, we can extract the common factor out of the switch and create a dense switch. For example:
switch (i) {
case 5: ...
case 9: ...
case 13: ...
case 17: ...
}
can become:
if ( (i - 5) % 4 ) goto default;
switch ((i - 5) / 4) {
case 0: ...
case 1: ...
case 2: ...
case 3: ...
}
or even better:
switch ( ROTR(i - 5, 2) {
case 0: ...
case 1: ...
case 2: ...
case 3: ...
}
The division and remainder operations could be costly so we only do this if the factor is a power of two, and emit a right-rotate instead of a divide/remainder sequence. Dense switches can be lowered significantly better than sparse switches and can even be transformed into lookup tables.
llvm-svn: 277325
Diffstat (limited to 'llvm/lib/Transforms/Utils/SimplifyCFG.cpp')
-rw-r--r-- | llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 0504646..f4d7f5e 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5038,6 +5038,109 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder, return true; } +static bool isSwitchDense(ArrayRef<int64_t> Values) { + // See also SelectionDAGBuilder::isDense(), which this function was based on. + uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front(); + uint64_t Range = Diff + 1; + uint64_t NumCases = Values.size(); + // 40% is the default density for building a jump table in optsize/minsize mode. + uint64_t MinDensity = 40; + + return NumCases * 100 >= Range * MinDensity; +} + +// Try and transform a switch that has "holes" in it to a contiguous sequence +// of cases. +// +// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be +// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}. +// +// This converts a sparse switch into a dense switch which allows better +// lowering and could also allow transforming into a lookup table. +static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, + const DataLayout &DL, + const TargetTransformInfo &TTI) { + auto *CondTy = cast<IntegerType>(SI->getCondition()->getType()); + if (CondTy->getIntegerBitWidth() > 64 || + !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth())) + return false; + // Only bother with this optimization if there are more than 3 switch cases; + // SDAG will only bother creating jump tables for 4 or more cases. + if (SI->getNumCases() < 4) + return false; + + // This transform is agnostic to the signedness of the input or case values. We + // can treat the case values as signed or unsigned. We can optimize more common + // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values + // as signed. + SmallVector<int64_t,4> Values; + for (auto &C : SI->cases()) + Values.push_back(C.getCaseValue()->getValue().getSExtValue()); + std::sort(Values.begin(), Values.end()); + + // If the switch is already dense, there's nothing useful to do here. + if (isSwitchDense(Values)) + return false; + + // First, transform the values such that they start at zero and ascend. + int64_t Base = Values[0]; + for (auto &V : Values) + V -= Base; + + // Now we have signed numbers that have been shifted so that, given enough + // precision, there are no negative values. Since the rest of the transform + // is bitwise only, we switch now to an unsigned representation. + uint64_t GCD = 0; + for (auto &V : Values) + GCD = llvm::GreatestCommonDivisor64(GCD, (uint64_t)V); + + // This transform can be done speculatively because it is so cheap - it results + // in a single rotate operation being inserted. This can only happen if the + // factor extracted is a power of 2. + // FIXME: If the GCD is an odd number we can multiply by the multiplicative + // inverse of GCD and then perform this transform. + // FIXME: It's possible that optimizing a switch on powers of two might also + // be beneficial - flag values are often powers of two and we could use a CLZ + // as the key function. + if (GCD <= 1 || !llvm::isPowerOf2_64(GCD)) + // No common divisor found or too expensive to compute key function. + return false; + + unsigned Shift = llvm::Log2_64(GCD); + for (auto &V : Values) + V = (int64_t)((uint64_t)V >> Shift); + + if (!isSwitchDense(Values)) + // Transform didn't create a dense switch. + return false; + + // The obvious transform is to shift the switch condition right and emit a + // check that the condition actually cleanly divided by GCD, i.e. + // C & (1 << Shift - 1) == 0 + // inserting a new CFG edge to handle the case where it didn't divide cleanly. + // + // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the + // shift and puts the shifted-off bits in the uppermost bits. If any of these + // are nonzero then the switch condition will be very large and will hit the + // default case. + + auto *Ty = cast<IntegerType>(SI->getCondition()->getType()); + Builder.SetInsertPoint(SI); + auto *ShiftC = ConstantInt::get(Ty, Shift); + auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base)); + auto *Rot = Builder.CreateOr(Builder.CreateLShr(Sub, ShiftC), + Builder.CreateShl(Sub, Ty->getBitWidth() - Shift)); + SI->replaceUsesOfWith(SI->getCondition(), Rot); + + for (auto &C : SI->cases()) { + auto *Orig = C.getCaseValue(); + auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base); + SI->replaceUsesOfWith(Orig, + ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))); + } + return true; +} + bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { BasicBlock *BB = SI->getParent(); @@ -5081,6 +5184,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) { if (SwitchToLookupTable(SI, Builder, DL, TTI)) return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + if (ReduceSwitchRange(SI, Builder, DL, TTI)) + return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true; + return false; } |