aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp23
-rw-r--r--llvm/test/CodeGen/X86/horizontal-shuffle-2.ll19
2 files changed, 19 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f3b1e6c..4b13b5b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44076,32 +44076,9 @@ static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG,
"Unexpected horizontal add/sub opcode");
if (!shouldUseHorizontalOp(true, DAG, Subtarget)) {
- // For slow-hop targets, if we have a hop with a single op, see if we already
- // have another user that we can reuse and shuffle the result.
MVT VT = N->getSimpleValueType(0);
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- if (VT.is128BitVector() && LHS == RHS) {
- for (SDNode *User : LHS->uses()) {
- if (User != N && User->getOpcode() == N->getOpcode()) {
- MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
- if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) {
- return DAG.getBitcast(
- VT,
- DAG.getVectorShuffle(ShufVT, SDLoc(N),
- DAG.getBitcast(ShufVT, SDValue(User, 0)),
- DAG.getUNDEF(ShufVT), {0, 1, 0, 1}));
- }
- if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) {
- return DAG.getBitcast(
- VT,
- DAG.getVectorShuffle(ShufVT, SDLoc(N),
- DAG.getBitcast(ShufVT, SDValue(User, 0)),
- DAG.getUNDEF(ShufVT), {2, 3, 2, 3}));
- }
- }
- }
- }
// HOP(HOP'(X,X),HOP'(Y,Y)) -> HOP(PERMUTE(HOP'(X,Y)),PERMUTE(HOP'(X,Y)).
if (LHS != RHS && LHS.getOpcode() == N->getOpcode() &&
diff --git a/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll b/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
index 78c30e4..9c5d7d4 100644
--- a/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
+++ b/llvm/test/CodeGen/X86/horizontal-shuffle-2.ll
@@ -171,6 +171,25 @@ define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) {
ret <4 x float> %3
}
+define <8 x i16> @PR51974(<8 x i16> %a0) {
+; SSE-LABEL: PR51974:
+; SSE: ## %bb.0:
+; SSE-NEXT: movdqa %xmm0, %xmm1
+; SSE-NEXT: phaddw %xmm0, %xmm1
+; SSE-NEXT: phaddw %xmm0, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm0
+; SSE-NEXT: ret{{[l|q]}}
+;
+; AVX-LABEL: PR51974:
+; AVX: ## %bb.0:
+; AVX-NEXT: vphaddw %xmm0, %xmm0, %xmm1
+; AVX-NEXT: vphaddw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: ret{{[l|q]}}
+ %r0 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a0)
+ %r1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %r0, <8 x i16> %a0)
+ ret <8 x i16> %r1
+}
+
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)