diff options
author | Sanjay Patel <spatel@rotateright.com> | 2021-03-02 13:50:22 -0500 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2021-03-02 14:29:15 -0500 |
commit | 415c67ba4ce58b5ab29fa17f033b944e420e62bc (patch) | |
tree | 02e524090369e83b932f29b4e3824fd59ebbd6ac | |
parent | 1b5ab13541319f1ec6fbda10d81048fba35c3abd (diff) | |
download | llvm-415c67ba4ce58b5ab29fa17f033b944e420e62bc.zip llvm-415c67ba4ce58b5ab29fa17f033b944e420e62bc.tar.gz llvm-415c67ba4ce58b5ab29fa17f033b944e420e62bc.tar.bz2 |
[SDAG] allow partial undef vector constants with select->logic folds
This is an enhancement suggested in the original review/commit:
D97730 / 7fce3322a283
-rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/select-with-and-or.ll | 15 |
4 files changed, 14 insertions, 21 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 7adbc5d..4a8dd95 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1677,12 +1677,12 @@ bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); /// Return true if the value is a constant 1 integer or a splatted vector of a /// constant 1 integer (with no undefs). /// Does not permit build vector implicit truncation. -bool isOneOrOneSplat(SDValue V); +bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false); /// Return true if the value is a constant -1 integer or a splatted vector of a /// constant -1 integer (with no undefs). /// Does not permit build vector implicit truncation. -bool isAllOnesOrAllOnesSplat(SDValue V); +bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false); class GlobalAddressSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 39003d6..caee795 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9312,22 +9312,22 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { // select Cond, Cond, F --> or Cond, F // select Cond, 1, F --> or Cond, F - if (Cond == T || isOneOrOneSplat(T)) + if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true)) return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F); // select Cond, T, Cond --> and Cond, T // select Cond, T, 0 --> and Cond, T - if (Cond == F || isNullOrNullSplat(F)) + if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true)) return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T); // select Cond, T, 1 --> or (not Cond), T - if (isOneOrOneSplat(F)) { + if (isOneOrOneSplat(F, /* AllowUndefs */ true)) { SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T); } // select Cond, 0, F --> and (not Cond), F - if (isNullOrNullSplat(T)) { + if (isNullOrNullSplat(T, /* AllowUndefs */ true)) { SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT); return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0c592b2..2b74aae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9253,17 +9253,17 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { return C && C->isNullValue(); } -bool llvm::isOneOrOneSplat(SDValue N) { +bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. unsigned BitWidth = N.getScalarValueSizeInBits(); - ConstantSDNode *C = isConstOrConstSplat(N); + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth; } -bool llvm::isAllOnesOrAllOnesSplat(SDValue N) { +bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { N = peekThroughBitcasts(N); unsigned BitWidth = N.getScalarValueSizeInBits(); - ConstantSDNode *C = isConstOrConstSplat(N); + ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; } diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll index a6e1825..94e6860d 100644 --- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll +++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll @@ -122,7 +122,7 @@ define <4 x i1> @and_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i3 ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -136,10 +136,8 @@ define <4 x i1> @or_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32 ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -152,9 +150,8 @@ define <4 x i1> @and_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w @@ -167,12 +164,8 @@ define <4 x i1> @or_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s -; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: xtn v3.4h, v0.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: bic v1.8b, v2.8b, v3.8b -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y %b = icmp sgt <4 x i32> %z, %w |