aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQingShan Zhang <qshanz@cn.ibm.com>2020-01-09 03:41:36 +0000
committerQingShan Zhang <qshanz@cn.ibm.com>2020-01-09 04:33:46 +0000
commitd48ac7d54d8a096677c84cfb2928400e05b918ea (patch)
treed9d9fde71e0263cb6226b22a28a51bde5057d776
parentde3d0ee023cb14c06d5be01369ef8db4cbfa16b4 (diff)
downloadllvm-d48ac7d54d8a096677c84cfb2928400e05b918ea.zip
llvm-d48ac7d54d8a096677c84cfb2928400e05b918ea.tar.gz
llvm-d48ac7d54d8a096677c84cfb2928400e05b918ea.tar.bz2
[DAGCombine] Fold the (fma -x, y, -z) to -(fma x, y, z)
This is a positive combination as long as the NEG is NOT free, as we are reducing the number of NEG from two to one. Differential Revision: https://reviews.llvm.org/D72312
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp9
-rw-r--r--llvm/test/CodeGen/PowerPC/combine-fneg.ll11
-rw-r--r--llvm/test/CodeGen/PowerPC/fma-combine.ll42
-rw-r--r--llvm/test/CodeGen/PowerPC/qpx-recipest.ll4
-rw-r--r--llvm/test/CodeGen/PowerPC/recipest.ll4
-rw-r--r--llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll4
6 files changed, 37 insertions, 37 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cfc4671..37b1b17 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12639,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
}
}
+ // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
+ // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
+ if (!TLI.isFNegFree(VT) &&
+ TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations,
+ ForCodeSize) == 2)
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ TLI.getNegatedExpression(SDValue(N, 0), DAG,
+ LegalOperations, ForCodeSize),
+ Flags);
return SDValue();
}
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 1d85f4f..14bace2 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -5,19 +5,16 @@
define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
; CHECK-LABEL: fneg_fdiv_splat:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-NEXT: xxspltd 0, 1, 0
-; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l
; CHECK-NEXT: lxvd2x 1, 0, 3
-; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha
; CHECK-NEXT: xvredp 2, 0
-; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l
-; CHECK-NEXT: xxswapd 1, 1
-; CHECK-NEXT: xvnmsubadp 1, 0, 2
-; CHECK-NEXT: xvmaddadp 2, 2, 1
-; CHECK-NEXT: lxvd2x 1, 0, 3
; CHECK-NEXT: xxswapd 1, 1
+; CHECK-NEXT: xxlor 3, 1, 1
+; CHECK-NEXT: xvmaddadp 3, 0, 2
+; CHECK-NEXT: xvnmsubadp 2, 2, 3
; CHECK-NEXT: xvmaddadp 1, 0, 2
; CHECK-NEXT: xvmsubadp 2, 2, 1
; CHECK-NEXT: xvmuldp 34, 34, 2
diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll
index 5baf663..88da295 100644
--- a/llvm/test/CodeGen/PowerPC/fma-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll
@@ -8,14 +8,12 @@
define double @fma_combine1(double %a, double %b, double %c) {
; CHECK-FAST-LABEL: fma_combine1:
; CHECK-FAST: # %bb.0: # %entry
-; CHECK-FAST-NEXT: xsnegdp 0, 3
-; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2
+; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2
; CHECK-FAST-NEXT: blr
;
; CHECK-FAST-NOVSX-LABEL: fma_combine1:
; CHECK-FAST-NOVSX: # %bb.0: # %entry
-; CHECK-FAST-NOVSX-NEXT: fneg 0, 3
-; CHECK-FAST-NOVSX-NEXT: fmsub 1, 0, 2, 1
+; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 3, 2, 1
; CHECK-FAST-NOVSX-NEXT: blr
;
; CHECK-LABEL: fma_combine1:
@@ -34,14 +32,12 @@ entry:
define double @fma_combine2(double %a, double %b, double %c) {
; CHECK-FAST-LABEL: fma_combine2:
; CHECK-FAST: # %bb.0: # %entry
-; CHECK-FAST-NEXT: xsnegdp 0, 3
-; CHECK-FAST-NEXT: xsmsubadp 1, 2, 0
+; CHECK-FAST-NEXT: xsnmaddadp 1, 2, 3
; CHECK-FAST-NEXT: blr
;
; CHECK-FAST-NOVSX-LABEL: fma_combine2:
; CHECK-FAST-NOVSX: # %bb.0: # %entry
-; CHECK-FAST-NOVSX-NEXT: fneg 0, 3
-; CHECK-FAST-NOVSX-NEXT: fmsub 1, 2, 0, 1
+; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 2, 3, 1
; CHECK-FAST-NOVSX-NEXT: blr
;
; CHECK-LABEL: fma_combine2:
@@ -62,25 +58,25 @@ entry:
define double @fma_combine_two_uses(double %a, double %b, double %c) {
; CHECK-FAST-LABEL: fma_combine_two_uses:
; CHECK-FAST: # %bb.0: # %entry
-; CHECK-FAST-NEXT: xsnegdp 0, 3
+; CHECK-FAST-NEXT: xsnegdp 0, 1
; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha
; CHECK-FAST-NEXT: addis 4, 2, z@toc@ha
-; CHECK-FAST-NEXT: xsnegdp 3, 1
-; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2
-; CHECK-FAST-NEXT: stfd 0, z@toc@l(4)
-; CHECK-FAST-NEXT: stfd 3, v@toc@l(3)
+; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2
+; CHECK-FAST-NEXT: xsnegdp 2, 3
+; CHECK-FAST-NEXT: stfd 0, v@toc@l(3)
+; CHECK-FAST-NEXT: stfd 2, z@toc@l(4)
; CHECK-FAST-NEXT: blr
;
; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses:
; CHECK-FAST-NOVSX: # %bb.0: # %entry
-; CHECK-FAST-NOVSX-NEXT: fneg 3, 3
+; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1
+; CHECK-FAST-NOVSX-NEXT: fneg 2, 1
; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha
; CHECK-FAST-NOVSX-NEXT: addis 4, 2, z@toc@ha
-; CHECK-FAST-NOVSX-NEXT: fmsub 0, 3, 2, 1
-; CHECK-FAST-NOVSX-NEXT: fneg 2, 1
-; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4)
+; CHECK-FAST-NOVSX-NEXT: fneg 3, 3
; CHECK-FAST-NOVSX-NEXT: fmr 1, 0
; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3)
+; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4)
; CHECK-FAST-NOVSX-NEXT: blr
;
; CHECK-LABEL: fma_combine_two_uses:
@@ -108,19 +104,17 @@ entry:
define double @fma_combine_one_use(double %a, double %b, double %c) {
; CHECK-FAST-LABEL: fma_combine_one_use:
; CHECK-FAST: # %bb.0: # %entry
-; CHECK-FAST-NEXT: xsnegdp 0, 3
+; CHECK-FAST-NEXT: xsnegdp 0, 1
; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha
-; CHECK-FAST-NEXT: xsnegdp 3, 1
-; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2
-; CHECK-FAST-NEXT: stfd 3, v@toc@l(3)
+; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2
+; CHECK-FAST-NEXT: stfd 0, v@toc@l(3)
; CHECK-FAST-NEXT: blr
;
; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use:
; CHECK-FAST-NOVSX: # %bb.0: # %entry
-; CHECK-FAST-NOVSX-NEXT: fneg 0, 3
-; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha
-; CHECK-FAST-NOVSX-NEXT: fmsub 0, 0, 2, 1
+; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1
; CHECK-FAST-NOVSX-NEXT: fneg 2, 1
+; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha
; CHECK-FAST-NOVSX-NEXT: fmr 1, 0
; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3)
; CHECK-FAST-NOVSX-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll
index 3bfd92a..246bec1 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll
@@ -229,8 +229,8 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind {
; CHECK-NEXT: qvfre 3, 2
; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l
; CHECK-NEXT: qvlfdx 0, 0, 3
-; CHECK-NEXT: qvfnmsub 0, 2, 3, 0
-; CHECK-NEXT: qvfmadd 0, 3, 0, 3
+; CHECK-NEXT: qvfmadd 0, 2, 3, 0
+; CHECK-NEXT: qvfnmsub 0, 3, 0, 3
; CHECK-NEXT: qvfmul 3, 1, 0
; CHECK-NEXT: qvfnmsub 1, 2, 3, 1
; CHECK-NEXT: qvfmadd 1, 0, 1, 3
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index bc33617..a0afb4b 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -194,8 +194,8 @@ define <4 x float> @hoo_safe(<4 x float> %a, <4 x float> %b) nounwind {
define double @foo2_fmf(double %a, double %b) nounwind {
; CHECK: @foo2_fmf
; CHECK-DAG: fre
-; CHECK-DAG: fnmsub
-; CHECK: fmadd
+; CHECK-DAG: fmadd
+; CHECK: fnmsub
; CHECK-NEXT: fmul
; CHECK-NEXT: fnmsub
; CHECK-NEXT: fmadd
diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
index 9ab320c..2cdf832 100644
--- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@@ -13,9 +13,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
; CHECK-NEXT: lvx 4, 0, 3
; CHECK-NEXT: xxspltw 0, 0, 0
; CHECK-NEXT: xvresp 1, 0
-; CHECK-NEXT: xvnmsubasp 35, 0, 1
+; CHECK-NEXT: xvmaddasp 35, 0, 1
; CHECK-NEXT: xvmulsp 0, 34, 36
-; CHECK-NEXT: xvmaddasp 1, 1, 35
+; CHECK-NEXT: xvnmsubasp 1, 1, 35
; CHECK-NEXT: xvmulsp 34, 0, 1
; CHECK-NEXT: blr
%ins = insertelement <4 x float> undef, float %a, i32 0