aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-07-23 12:39:08 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-07-23 12:39:08 +0000
commit743d45ee2536d80ecadcc78ba5fc042de3bb3749 (patch)
treed8a00e8c0dc3f0c62ee7a3be723ff6d1e4b5275f /llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
parent7c35db08655182021c9ba9c7171e625eaa7e50e2 (diff)
downloadllvm-743d45ee2536d80ecadcc78ba5fc042de3bb3749.zip
llvm-743d45ee2536d80ecadcc78ba5fc042de3bb3749.tar.gz
llvm-743d45ee2536d80ecadcc78ba5fc042de3bb3749.tar.bz2
[TargetLowering] Add SimplifyMultipleUseDemandedBits
This patch introduces the DAG version of SimplifyMultipleUseDemandedBits, which attempts to peek through ops (mainly and/or/xor so far) that don't contribute to the demandedbits/elts of a node - which means we can do this even in cases where we have multiple uses of an op, which normally requires us to demanded all bits/elts. The intention is to remove a similar instruction - SelectionDAG::GetDemandedBits - once SimplifyMultipleUseDemandedBits has matured. The InstCombine version of SimplifyMultipleUseDemandedBits can constant fold which I haven't added here yet, and so far I've only wired this up to some basic binops (and/or/xor/add/sub/mul) to demonstrate its use. We do see a couple of regressions that need to be addressed: AMDGPU unsigned dot product codegen retains an AND mask (for ZERO_EXTEND) that it previously removed (but otherwise the dotproduct codegen is a lot better). X86/AVX2 has poor handling of vector ANY_EXTEND/ANY_EXTEND_VECTOR_INREG - it prematurely gets converted to ZERO_EXTEND_VECTOR_INREG. The code owners have confirmed its ok for these cases to fixed up in future patches. Differential Revision: https://reviews.llvm.org/D63281 llvm-svn: 366799
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp129
1 files changed, 128 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bab99b2..0b28fbb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -564,6 +564,61 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
AssumeSingleUse);
}
+// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
+// TODO: Under what circumstances can we create nodes? BITCAST? Constant?
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ KnownBits LHSKnown, RHSKnown;
+ switch (Op.getOpcode()) {
+ case ISD::AND: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::OR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::XOR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth))
+ return V;
+ break;
+ }
+ return SDValue();
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -834,6 +889,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
@@ -869,6 +938,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
@@ -901,6 +984,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
@@ -1663,6 +1760,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
@@ -1671,7 +1769,6 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
- SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -1684,6 +1781,23 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
}
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If we have a constant operand, we may be able to turn it into -1 if we
// do not demand the high bits. This can make the constant smaller to
// encode, allow more general folding, or match specialized instruction
@@ -2357,6 +2471,19 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
+ " is a target node!");
+ return SDValue();
+}
+
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
return nullptr;
}