[DAGCombiner][RISCV][AArch64][PowerPC] Restrict foldAndOrOfSETCC from using SMIN/SMAX where and OR/AND would do.

This removes some diffs created by D153502. I'm assuming an AND/OR won't be worse than an SMIN/SMAX. For RISC-V at least, AND/OR can be a shorter encoding than SMIN/SMAX. It's weird that we have two different functions responsible for folding logic of setccs, but I'm not ready to try to untangle that. I'm unclear if the PowerPC chang is a regression or not. It looks like it might use more registers, but I don't understand PowerPC register so I'm not sure. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D158292
author: Craig Topper <craig.topper@sifive.com> 2023-08-23 20:26:20 -0700
committer: Craig Topper <craig.topper@sifive.com> 2023-08-23 20:26:23 -0700
commit: 2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b (patch)
tree: 37e457477cc1c6b342ffb5787f2e840776128a77 /llvm
parent: 16ccba51072bbc5ff4c66f91f939163dc91e5d96 (diff)
download: llvm-2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b.zip
llvm-2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b.tar.gz
llvm-2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b.tar.bz2
4 files changed, 23 insertions, 16 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6407afe..70ba860 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6118,6 +6118,13 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
       }
     }
 
+    // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
+    // handle it using OR/AND.
+    if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
+      CC = ISD::SETCC_INVALID;
+    else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
+      CC = ISD::SETCC_INVALID;
+
     if (CC != ISD::SETCC_INVALID) {
       unsigned NewOpcode;
       bool IsSigned = isSignedIntSetCC(CC);
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
index d75daa5..58020d2 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-bool.ll
@@ -96,7 +96,7 @@ define i32 @reduce_and_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 define i32 @reduce_and_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_and_v32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smax v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    uminv b0, v0.16b
 ; CHECK-NEXT:    fmov w8, s0
@@ -190,7 +190,7 @@ define i32 @reduce_or_v16(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 define i32 @reduce_or_v32(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind {
 ; CHECK-LABEL: reduce_or_v32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    smin v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmlt v0.16b, v0.16b, #0
 ; CHECK-NEXT:    umaxv b0, v0.16b
 ; CHECK-NEXT:    fmov w8, s0
diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index e3ff0ff..7dca471 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -325,9 +325,9 @@ define <4 x i1> @all_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @all_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: all_sign_bits_clear_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vminsw 2, 2, 3
-; CHECK-NEXT:    xxleqv 35, 35, 35
-; CHECK-NEXT:    vcmpgtsw 2, 2, 3
+; CHECK-NEXT:    xxleqv 36, 36, 36
+; CHECK-NEXT:    xxlor 34, 34, 35
+; CHECK-NEXT:    vcmpgtsw 2, 2, 4
 ; CHECK-NEXT:    blr
   %a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -351,9 +351,9 @@ define <4 x i1> @all_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @all_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: all_sign_bits_set_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmaxsw 2, 2, 3
-; CHECK-NEXT:    xxlxor 35, 35, 35
-; CHECK-NEXT:    vcmpgtsw 2, 3, 2
+; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    xxland 34, 34, 35
+; CHECK-NEXT:    vcmpgtsw 2, 4, 2
 ; CHECK-NEXT:    blr
   %a = icmp slt <4 x i32> %P, zeroinitializer
   %b = icmp slt <4 x i32> %Q, zeroinitializer
@@ -378,9 +378,9 @@ define <4 x i1> @any_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @any_sign_bits_set_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: any_sign_bits_set_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vminsw 2, 2, 3
-; CHECK-NEXT:    xxlxor 35, 35, 35
-; CHECK-NEXT:    vcmpgtsw 2, 3, 2
+; CHECK-NEXT:    xxlxor 36, 36, 36
+; CHECK-NEXT:    xxlor 34, 34, 35
+; CHECK-NEXT:    vcmpgtsw 2, 4, 2
 ; CHECK-NEXT:    blr
   %a = icmp slt <4 x i32> %P, zeroinitializer
   %b = icmp slt <4 x i32> %Q, zeroinitializer
@@ -405,9 +405,9 @@ define <4 x i1> @any_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 define <4 x i1> @any_sign_bits_clear_vec(<4 x i32> %P, <4 x i32> %Q) {
 ; CHECK-LABEL: any_sign_bits_clear_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vmaxsw 2, 2, 3
-; CHECK-NEXT:    xxleqv 35, 35, 35
-; CHECK-NEXT:    vcmpgtsw 2, 2, 3
+; CHECK-NEXT:    xxleqv 36, 36, 36
+; CHECK-NEXT:    xxland 34, 34, 35
+; CHECK-NEXT:    vcmpgtsw 2, 2, 4
 ; CHECK-NEXT:    blr
   %a = icmp sgt <4 x i32> %P, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = icmp sgt <4 x i32> %Q, <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll
index 8910c76..b94c50d 100644
--- a/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll
+++ b/llvm/test/CodeGen/RISCV/zbb-cmp-combine.ll
@@ -223,7 +223,7 @@ define i1 @flo(float %c, float %a, float %b) {
 ; CHECK-RV64I-NEXT:    mv a0, s0
 ; CHECK-RV64I-NEXT:    mv a1, s1
 ; CHECK-RV64I-NEXT:    call __gesf2@plt
-; CHECK-RV64I-NEXT:    min a0, s2, a0
+; CHECK-RV64I-NEXT:    or a0, s2, a0
 ; CHECK-RV64I-NEXT:    slti a0, a0, 0
 ; CHECK-RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -269,7 +269,7 @@ define i1 @dlo(double %c, double %a, double %b) {
 ; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a1, s1
 ; CHECK-NEXT:    call __gedf2@plt
-; CHECK-NEXT:    min a0, s2, a0
+; CHECK-NEXT:    or a0, s2, a0
 ; CHECK-NEXT:    slti a0, a0, 0
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
author	Craig Topper <craig.topper@sifive.com>	2023-08-23 20:26:20 -0700
committer	Craig Topper <craig.topper@sifive.com>	2023-08-23 20:26:23 -0700
commit	2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b (patch)
tree	37e457477cc1c6b342ffb5787f2e840776128a77 /llvm
parent	16ccba51072bbc5ff4c66f91f939163dc91e5d96 (diff)
download	llvm-2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b.zip llvm-2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b.tar.gz llvm-2ad50f354a2dbd7a1c0ab0ab15723ed48d4a5b7b.tar.bz2