diff options
author | QingShan Zhang <qshanz@cn.ibm.com> | 2020-01-09 03:41:36 +0000 |
---|---|---|
committer | QingShan Zhang <qshanz@cn.ibm.com> | 2020-01-09 04:33:46 +0000 |
commit | d48ac7d54d8a096677c84cfb2928400e05b918ea (patch) | |
tree | d9d9fde71e0263cb6226b22a28a51bde5057d776 | |
parent | de3d0ee023cb14c06d5be01369ef8db4cbfa16b4 (diff) | |
download | llvm-d48ac7d54d8a096677c84cfb2928400e05b918ea.zip llvm-d48ac7d54d8a096677c84cfb2928400e05b918ea.tar.gz llvm-d48ac7d54d8a096677c84cfb2928400e05b918ea.tar.bz2 |
[DAGCombine] Fold the (fma -x, y, -z) to -(fma x, y, z)
This is a positive combination as long as the NEG is NOT free,
as we are reducing the number of NEG from two to one.
Differential Revision: https://reviews.llvm.org/D72312
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/combine-fneg.ll | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/fma-combine.ll | 42 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-recipest.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/recipest.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll | 4 |
6 files changed, 37 insertions, 37 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cfc4671..37b1b17 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12639,6 +12639,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } + // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) + // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) + if (!TLI.isFNegFree(VT) && + TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations, + ForCodeSize) == 2) + return DAG.getNode(ISD::FNEG, DL, VT, + TLI.getNegatedExpression(SDValue(N, 0), DAG, + LegalOperations, ForCodeSize), + Flags); return SDValue(); } diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll index 1d85f4f..14bace2 100644 --- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -5,19 +5,16 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) { ; CHECK-LABEL: fneg_fdiv_splat: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: # kill: def $f1 killed $f1 def $vsl1 ; CHECK-NEXT: xxspltd 0, 1, 0 -; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l ; CHECK-NEXT: lxvd2x 1, 0, 3 -; CHECK-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; CHECK-NEXT: xvredp 2, 0 -; CHECK-NEXT: addi 3, 3, .LCPI0_1@toc@l -; CHECK-NEXT: xxswapd 1, 1 -; CHECK-NEXT: xvnmsubadp 1, 0, 2 -; CHECK-NEXT: xvmaddadp 2, 2, 1 -; CHECK-NEXT: lxvd2x 1, 0, 3 ; CHECK-NEXT: xxswapd 1, 1 +; CHECK-NEXT: xxlor 3, 1, 1 +; CHECK-NEXT: xvmaddadp 3, 0, 2 +; CHECK-NEXT: xvnmsubadp 2, 2, 3 ; CHECK-NEXT: xvmaddadp 1, 0, 2 ; CHECK-NEXT: xvmsubadp 2, 2, 1 ; CHECK-NEXT: xvmuldp 34, 34, 2 diff --git a/llvm/test/CodeGen/PowerPC/fma-combine.ll b/llvm/test/CodeGen/PowerPC/fma-combine.ll index 5baf663..88da295 100644 --- a/llvm/test/CodeGen/PowerPC/fma-combine.ll +++ b/llvm/test/CodeGen/PowerPC/fma-combine.ll @@ -8,14 +8,12 @@ define double @fma_combine1(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine1: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 -; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine1: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 -; CHECK-FAST-NOVSX-NEXT: fmsub 1, 0, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 3, 2, 1 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine1: @@ -34,14 +32,12 @@ entry: define double @fma_combine2(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine2: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 -; CHECK-FAST-NEXT: xsmsubadp 1, 2, 0 +; CHECK-FAST-NEXT: xsnmaddadp 1, 2, 3 ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine2: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 -; CHECK-FAST-NOVSX-NEXT: fmsub 1, 2, 0, 1 +; CHECK-FAST-NOVSX-NEXT: fnmadd 1, 2, 3, 1 ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine2: @@ -62,25 +58,25 @@ entry: define double @fma_combine_two_uses(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine_two_uses: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: xsnegdp 0, 1 ; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NEXT: addis 4, 2, z@toc@ha -; CHECK-FAST-NEXT: xsnegdp 3, 1 -; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 -; CHECK-FAST-NEXT: stfd 0, z@toc@l(4) -; CHECK-FAST-NEXT: stfd 3, v@toc@l(3) +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 +; CHECK-FAST-NEXT: xsnegdp 2, 3 +; CHECK-FAST-NEXT: stfd 0, v@toc@l(3) +; CHECK-FAST-NEXT: stfd 2, z@toc@l(4) ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine_two_uses: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 3, 3 +; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 ; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NOVSX-NEXT: addis 4, 2, z@toc@ha -; CHECK-FAST-NOVSX-NEXT: fmsub 0, 3, 2, 1 -; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 -; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) +; CHECK-FAST-NOVSX-NEXT: fneg 3, 3 ; CHECK-FAST-NOVSX-NEXT: fmr 1, 0 ; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) +; CHECK-FAST-NOVSX-NEXT: stfd 3, z@toc@l(4) ; CHECK-FAST-NOVSX-NEXT: blr ; ; CHECK-LABEL: fma_combine_two_uses: @@ -108,19 +104,17 @@ entry: define double @fma_combine_one_use(double %a, double %b, double %c) { ; CHECK-FAST-LABEL: fma_combine_one_use: ; CHECK-FAST: # %bb.0: # %entry -; CHECK-FAST-NEXT: xsnegdp 0, 3 +; CHECK-FAST-NEXT: xsnegdp 0, 1 ; CHECK-FAST-NEXT: addis 3, 2, v@toc@ha -; CHECK-FAST-NEXT: xsnegdp 3, 1 -; CHECK-FAST-NEXT: xsmsubadp 1, 0, 2 -; CHECK-FAST-NEXT: stfd 3, v@toc@l(3) +; CHECK-FAST-NEXT: xsnmaddadp 1, 3, 2 +; CHECK-FAST-NEXT: stfd 0, v@toc@l(3) ; CHECK-FAST-NEXT: blr ; ; CHECK-FAST-NOVSX-LABEL: fma_combine_one_use: ; CHECK-FAST-NOVSX: # %bb.0: # %entry -; CHECK-FAST-NOVSX-NEXT: fneg 0, 3 -; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha -; CHECK-FAST-NOVSX-NEXT: fmsub 0, 0, 2, 1 +; CHECK-FAST-NOVSX-NEXT: fnmadd 0, 3, 2, 1 ; CHECK-FAST-NOVSX-NEXT: fneg 2, 1 +; CHECK-FAST-NOVSX-NEXT: addis 3, 2, v@toc@ha ; CHECK-FAST-NOVSX-NEXT: fmr 1, 0 ; CHECK-FAST-NOVSX-NEXT: stfd 2, v@toc@l(3) ; CHECK-FAST-NOVSX-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll index 3bfd92a..246bec1 100644 --- a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -229,8 +229,8 @@ define <4 x double> @foo2_fmf(<4 x double> %a, <4 x double> %b) nounwind { ; CHECK-NEXT: qvfre 3, 2 ; CHECK-NEXT: addi 3, 3, .LCPI8_0@toc@l ; CHECK-NEXT: qvlfdx 0, 0, 3 -; CHECK-NEXT: qvfnmsub 0, 2, 3, 0 -; CHECK-NEXT: qvfmadd 0, 3, 0, 3 +; CHECK-NEXT: qvfmadd 0, 2, 3, 0 +; CHECK-NEXT: qvfnmsub 0, 3, 0, 3 ; CHECK-NEXT: qvfmul 3, 1, 0 ; CHECK-NEXT: qvfnmsub 1, 2, 3, 1 ; CHECK-NEXT: qvfmadd 1, 0, 1, 3 diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll index bc33617..a0afb4b 100644 --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -194,8 +194,8 @@ define <4 x float> @hoo_safe(<4 x float> %a, <4 x float> %b) nounwind { define double @foo2_fmf(double %a, double %b) nounwind { ; CHECK: @foo2_fmf ; CHECK-DAG: fre -; CHECK-DAG: fnmsub -; CHECK: fmadd +; CHECK-DAG: fmadd +; CHECK: fnmsub ; CHECK-NEXT: fmul ; CHECK-NEXT: fnmsub ; CHECK-NEXT: fmadd diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll index 9ab320c..2cdf832 100644 --- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll +++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll @@ -13,9 +13,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) { ; CHECK-NEXT: lvx 4, 0, 3 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: xvresp 1, 0 -; CHECK-NEXT: xvnmsubasp 35, 0, 1 +; CHECK-NEXT: xvmaddasp 35, 0, 1 ; CHECK-NEXT: xvmulsp 0, 34, 36 -; CHECK-NEXT: xvmaddasp 1, 1, 35 +; CHECK-NEXT: xvnmsubasp 1, 1, 35 ; CHECK-NEXT: xvmulsp 34, 0, 1 ; CHECK-NEXT: blr %ins = insertelement <4 x float> undef, float %a, i32 0 |