diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-04-03 22:15:29 +0000 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-04-03 22:15:29 +0000 |
commit | 61b5e3c6a991e7ddaf5867bd3043e8c45f801054 (patch) | |
tree | 425dd5d12ac5e3b497936692d24d63a2b3bfaa26 | |
parent | e51c12430ffe26c5889f41507e11409bd4f58de2 (diff) | |
download | llvm-61b5e3c6a991e7ddaf5867bd3043e8c45f801054.zip llvm-61b5e3c6a991e7ddaf5867bd3043e8c45f801054.tar.gz llvm-61b5e3c6a991e7ddaf5867bd3043e8c45f801054.tar.bz2 |
[x86] eliminate movddup of horizontal op
This pattern would show up as a regression if we more
aggressively convert vector FP ops to scalar ops.
There's still a missed optimization for the v4f64 legal
case (AVX) because we create that h-op with an undef operand.
We should probably just duplicate the operands for that
pattern to avoid trouble.
llvm-svn: 357642
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/haddsub-shuf.ll | 4 |
2 files changed, 11 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index f22e893..8400997 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32866,8 +32866,10 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, /// Eliminate a redundant shuffle of a horizontal math op. static SDValue foldShuffleOfHorizOp(SDNode *N) { - if (N->getOpcode() != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef()) - return SDValue(); + unsigned Opcode = N->getOpcode(); + if (Opcode != X86ISD::MOVDDUP) + if (Opcode != ISD::VECTOR_SHUFFLE || !N->getOperand(1).isUndef()) + return SDValue(); SDValue HOp = N->getOperand(0); if (HOp.getOpcode() != X86ISD::HADD && HOp.getOpcode() != X86ISD::FHADD && @@ -32885,6 +32887,13 @@ static SDValue foldShuffleOfHorizOp(SDNode *N) { // When the operands of a horizontal math op are identical, the low half of // the result is the same as the high half. If the shuffle is also replicating // low and high halves, we don't need the shuffle. + if (Opcode == X86ISD::MOVDDUP) { + // movddup (hadd X, X) --> hadd X, X + assert((HOp.getValueType() == MVT::v2f64 || + HOp.getValueType() == MVT::v4f64) && "Unexpected type for h-op"); + return HOp; + } + // shuffle (hadd X, X), undef, [low half...high half] --> hadd X, X ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); // TODO: Other mask possibilities like {1,1} and {1,0} could be added here, diff --git a/llvm/test/CodeGen/X86/haddsub-shuf.ll b/llvm/test/CodeGen/X86/haddsub-shuf.ll index 92bb55a..e50d6af 100644 --- a/llvm/test/CodeGen/X86/haddsub-shuf.ll +++ b/llvm/test/CodeGen/X86/haddsub-shuf.ll @@ -325,7 +325,6 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) { ; SSSE3_FAST-LABEL: hadd_v2f64_scalar_splat: ; SSSE3_FAST: # %bb.0: ; SSSE3_FAST-NEXT: haddpd %xmm0, %xmm0 -; SSSE3_FAST-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] ; SSSE3_FAST-NEXT: retq ; ; AVX1_SLOW-LABEL: hadd_v2f64_scalar_splat: @@ -338,7 +337,6 @@ define <2 x double> @hadd_v2f64_scalar_splat(<2 x double> %a) { ; AVX1_FAST-LABEL: hadd_v2f64_scalar_splat: ; AVX1_FAST: # %bb.0: ; AVX1_FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX1_FAST-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] ; AVX1_FAST-NEXT: retq ; ; AVX2_SLOW-LABEL: hadd_v2f64_scalar_splat: @@ -378,8 +376,6 @@ define <4 x double> @hadd_v4f64_scalar_splat(<4 x double> %a) { ; SSSE3_FAST: # %bb.0: ; SSSE3_FAST-NEXT: haddpd %xmm0, %xmm0 ; SSSE3_FAST-NEXT: haddpd %xmm1, %xmm1 -; SSSE3_FAST-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] -; SSSE3_FAST-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] ; SSSE3_FAST-NEXT: retq ; ; AVX-LABEL: hadd_v4f64_scalar_splat: |