aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2021-03-02 13:50:22 -0500
committerSanjay Patel <spatel@rotateright.com>2021-03-02 14:29:15 -0500
commit415c67ba4ce58b5ab29fa17f033b944e420e62bc (patch)
tree02e524090369e83b932f29b4e3824fd59ebbd6ac
parent1b5ab13541319f1ec6fbda10d81048fba35c3abd (diff)
downloadllvm-415c67ba4ce58b5ab29fa17f033b944e420e62bc.zip
llvm-415c67ba4ce58b5ab29fa17f033b944e420e62bc.tar.gz
llvm-415c67ba4ce58b5ab29fa17f033b944e420e62bc.tar.bz2
[SDAG] allow partial undef vector constants with select->logic folds
This is an enhancement suggested in the original review/commit: D97730 / 7fce3322a283
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp8
-rw-r--r--llvm/test/CodeGen/AArch64/select-with-and-or.ll15
4 files changed, 14 insertions, 21 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 7adbc5d..4a8dd95 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1677,12 +1677,12 @@ bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false);
/// Return true if the value is a constant 1 integer or a splatted vector of a
/// constant 1 integer (with no undefs).
/// Does not permit build vector implicit truncation.
-bool isOneOrOneSplat(SDValue V);
+bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false);
/// Return true if the value is a constant -1 integer or a splatted vector of a
/// constant -1 integer (with no undefs).
/// Does not permit build vector implicit truncation.
-bool isAllOnesOrAllOnesSplat(SDValue V);
+bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false);
class GlobalAddressSDNode : public SDNode {
friend class SelectionDAG;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 39003d6..caee795 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9312,22 +9312,22 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
// select Cond, Cond, F --> or Cond, F
// select Cond, 1, F --> or Cond, F
- if (Cond == T || isOneOrOneSplat(T))
+ if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
// select Cond, T, Cond --> and Cond, T
// select Cond, T, 0 --> and Cond, T
- if (Cond == F || isNullOrNullSplat(F))
+ if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
// select Cond, T, 1 --> or (not Cond), T
- if (isOneOrOneSplat(F)) {
+ if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
}
// select Cond, 0, F --> and (not Cond), F
- if (isNullOrNullSplat(T)) {
+ if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 0c592b2..2b74aae 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -9253,17 +9253,17 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
return C && C->isNullValue();
}
-bool llvm::isOneOrOneSplat(SDValue N) {
+bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
unsigned BitWidth = N.getScalarValueSizeInBits();
- ConstantSDNode *C = isConstOrConstSplat(N);
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
return C && C->isOne() && C->getValueSizeInBits(0) == BitWidth;
}
-bool llvm::isAllOnesOrAllOnesSplat(SDValue N) {
+bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) {
N = peekThroughBitcasts(N);
unsigned BitWidth = N.getScalarValueSizeInBits();
- ConstantSDNode *C = isConstOrConstSplat(N);
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth;
}
diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
index a6e1825..94e6860d 100644
--- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll
+++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
@@ -122,7 +122,7 @@ define <4 x i1> @and_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i3
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y
@@ -136,10 +136,8 @@ define <4 x i1> @or_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: xtn v1.4h, v1.4s
-; CHECK-NEXT: movi v2.4h, #1
-; CHECK-NEXT: bsl v0.8b, v2.8b, v1.8b
; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w
@@ -152,9 +150,8 @@ define <4 x i1> @and_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: xtn v1.4h, v1.4s
-; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w
@@ -167,12 +164,8 @@ define <4 x i1> @or_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x
; CHECK: // %bb.0:
; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s
; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s
-; CHECK-NEXT: movi v2.4h, #1
-; CHECK-NEXT: xtn v3.4h, v0.4s
-; CHECK-NEXT: and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b
; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: bic v1.8b, v2.8b, v3.8b
-; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
; CHECK-NEXT: ret
%a = icmp eq <4 x i32> %x, %y
%b = icmp sgt <4 x i32> %z, %w