diff options
author | Sanjay Patel <spatel@rotateright.com> | 2017-08-06 16:27:07 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2017-08-06 16:27:07 +0000 |
commit | a923c2ee95a4b3b6d43a850789ba56c6aa249b3c (patch) | |
tree | be69b92b4063e1ba19c60cf82a2611f084c21a2f /llvm/lib | |
parent | a9b5bbac789a69322ec62011bcb8e8462a097e59 (diff) | |
download | llvm-a923c2ee95a4b3b6d43a850789ba56c6aa249b3c.zip llvm-a923c2ee95a4b3b6d43a850789ba56c6aa249b3c.tar.gz llvm-a923c2ee95a4b3b6d43a850789ba56c6aa249b3c.tar.bz2 |
[x86] use more shift or LEA for select-of-constants
We can convert any select-of-constants to math ops:
http://rise4fun.com/Alive/d7d
For this patch, I'm enhancing an existing x86 transform that uses fake multiplies
(they always become shl/lea) to avoid cmov or branching. The current code misses
cases where we have a negative constant and a positive constant, so this is just
trying to plug that hole.
The DAGCombiner diff prevents us from hitting a terrible inefficiency: we can start
with a select in IR, create a select DAG node, convert it into a sext, convert it
back into a select, and then lower it to sext machine code.
Some notes about the test diffs:
1. 2010-08-04-MaskedSignedCompare.ll - We were creating control flow that didn't exist in the IR.
2. memcmp.ll - Choose -1 or 1 is the case that got me looking at this again. I
think we could avoid the push/pop in some cases if we used 'movzbl %al' instead of an xor on
a different reg? That's a post-DAG problem though.
3. mul-constant-result.ll - The trade-off between sbb+not vs. setne+neg could be addressed if
that's a regression, but I think those would always be nearly equivalent.
4. pr22338.ll and sext-i1.ll - These tests have undef operands, so I don't think we actually care about these diffs.
5. sbb.ll - This shows a win for what I think is a common case: choose -1 or 0.
6. select.ll - There's another borderline case here: cmp+sbb+or vs. test+set+lea? Also, sbb+not vs. setae+neg shows up again.
7. select_const.ll - These are motivating cases for the enhancement; replace cmov with cheaper ops.
Assembly differences between movzbl and xor to avoid a partial reg stall are caused later by the X86 Fixup SetCC pass.
Differential Revision: https://reviews.llvm.org/D35340
llvm-svn: 310208
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 95 |
2 files changed, 32 insertions, 65 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 937522e..353c86f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7394,7 +7394,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector()) { + if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath()) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e1dd6d0..e60fcf5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30060,78 +30060,45 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { return SDValue(); // Don't do this for crazy integer types. - if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) + EVT VT = N->getValueType(0); + if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) return SDValue(); - // If this is efficiently invertible, canonicalize the LHSC/RHSC values - // so that TrueC (the true value) is larger than FalseC. - bool NeedsCondInvert = false; - if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && - // Efficiently invertible. - (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. - (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible. - isa<ConstantSDNode>(Cond.getOperand(1))))) { - NeedsCondInvert = true; - std::swap(TrueC, FalseC); - } - - // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. - if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { - if (NeedsCondInvert) // Invert the condition if needed. - Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, DL, Cond.getValueType())); - - // Zero extend the condition if needed. - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); - - unsigned ShAmt = TrueC->getAPIntValue().logBase2(); - return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, - DAG.getConstant(ShAmt, DL, MVT::i8)); - } + // We're going to use the condition bit in math or logic ops. We could allow + // this with a wider condition value (post-legalization it becomes an i8), + // but if nothing is creating selects that late, it doesn't matter. + if (Cond.getValueType() != MVT::i1) + return SDValue(); - // Optimize cases that will turn into an LEA instruction. This requires - // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). - if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { - uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue(); - if (N->getValueType(0) == MVT::i32) - Diff = (unsigned)Diff; + // A power-of-2 multiply is just a shift. LEA also cheaply handles multiply by + // 3, 5, or 9 with i32/i64, so those get transformed too. + // TODO: For constants that do not differ by power-of-2 or small multiplier, + // convert to 'and' + 'add'. + APInt AbsDiff = (TrueC->getAPIntValue() - FalseC->getAPIntValue()).abs(); + if (AbsDiff.isPowerOf2() || + ((VT == MVT::i32 || VT == MVT::i64) && + (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) { - bool IsFastMultiplier = false; - if (Diff < 10) { - switch ((unsigned char)Diff) { - default: - break; - case 1: // result = add base, cond - case 2: // result = lea base( , cond*2) - case 3: // result = lea base(cond, cond*2) - case 4: // result = lea base( , cond*4) - case 5: // result = lea base(cond, cond*4) - case 8: // result = lea base( , cond*8) - case 9: // result = lea base(cond, cond*8) - IsFastMultiplier = true; - break; - } + // We need a positive multiplier constant for shift/LEA codegen. The 'not' + // of the condition can usually be folded into a compare predicate, but even + // without that, the sequence should be cheaper than a CMOV alternative. + if (TrueC->getAPIntValue().slt(FalseC->getAPIntValue())) { + Cond = DAG.getNOT(DL, Cond, MVT::i1); + std::swap(TrueC, FalseC); } - if (IsFastMultiplier) { - APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); - if (NeedsCondInvert) // Invert the condition if needed. - Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, - DAG.getConstant(1, DL, Cond.getValueType())); + // select Cond, TC, FC --> (zext(Cond) * (TC - FC)) + FC + SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - // Zero extend the condition if needed. - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond); - // Scale the condition by the difference. - if (Diff != 1) - Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, - DAG.getConstant(Diff, DL, Cond.getValueType())); + // Multiply condition by the difference if non-one. + if (!AbsDiff.isOneValue()) + R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); - // Add the base if non-zero. - if (FalseC->getAPIntValue() != 0) - Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, - SDValue(FalseC, 0)); - return Cond; - } + // Add the base if non-zero. + if (!FalseC->isNullValue()) + R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); + + return R; } return SDValue(); |