aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2020-11-19 12:12:16 +0000
committerTom Stellard <tstellar@redhat.com>2020-12-09 11:29:51 -0500
commit14d60e9a80d40f9efc4b76524a07320d38994d2b (patch)
tree4a32b5a37b4a24e1edd2c2411a284ced5eb9848c /llvm
parenta21e609d6a255f893fa7cbd863a3bc5c017c478e (diff)
downloadllvm-14d60e9a80d40f9efc4b76524a07320d38994d2b.zip
llvm-14d60e9a80d40f9efc4b76524a07320d38994d2b.tar.gz
llvm-14d60e9a80d40f9efc4b76524a07320d38994d2b.tar.bz2
[X86][AVX] Only share broadcasts of different widths from the same SDValue of the same SDNode (PR48215)
D57663 allowed us to reuse broadcasts of the same scalar value by extracting low subvectors from the widest type. Unfortunately we weren't ensuring the broadcasts were from the same SDValue, just the same SDNode - which failed on multiple-value nodes like ISD::SDIVREM FYI: I intend to request this be merged into the 11.x release branch. Differential Revision: https://reviews.llvm.org/D91709 (cherry picked from commit 14ae02fb3397961bb5f99a0df60622375fc1976d) Signed-off-by: Warren Ristow <warren.ristow@sony.com>
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/X86/pr48215.ll15
2 files changed, 11 insertions, 6 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index afe470cc..f5b704e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36018,8 +36018,10 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0));
// Share broadcast with the longest vector and extract low subvector (free).
+ // Ensure the same SDValue from the SDNode use is being used.
for (SDNode *User : Src->uses())
if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST &&
+ Src == User->getOperand(0) &&
User->getValueSizeInBits(0) > VT.getSizeInBits()) {
return extractSubVector(SDValue(User, 0), 0, DAG, DL,
VT.getSizeInBits());
diff --git a/llvm/test/CodeGen/X86/pr48215.ll b/llvm/test/CodeGen/X86/pr48215.ll
index c825955..125bde7 100644
--- a/llvm/test/CodeGen/X86/pr48215.ll
+++ b/llvm/test/CodeGen/X86/pr48215.ll
@@ -33,12 +33,14 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
; AVX2-NEXT: idivl %esi
; AVX2-NEXT: vmovd %eax, %xmm0
; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0
-; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7]
+; AVX2-NEXT: vmovd %edx, %xmm1
+; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7]
+; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0
; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,7]
-; AVX2-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm1
-; AVX2-NEXT: vmovmskps %ymm1, %ecx
-; AVX2-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
-; AVX2-NEXT: vmovmskps %xmm0, %eax
+; AVX2-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
+; AVX2-NEXT: vmovmskps %ymm0, %ecx
+; AVX2-NEXT: vmovmskps %xmm1, %eax
; AVX2-NEXT: addl %ecx, %eax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@@ -49,8 +51,9 @@ define i32 @PR48215(i32 %a0, i32 %a1) {
; AVX512-NEXT: cltd
; AVX512-NEXT: idivl %esi
; AVX512-NEXT: vpbroadcastd %eax, %ymm0
+; AVX512-NEXT: vpbroadcastd %edx, %xmm1
; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %ymm0, %k0
-; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm0, %k1
+; AVX512-NEXT: vpcmpltd {{.*}}(%rip), %xmm1, %k1
; AVX512-NEXT: kmovw %k0, %eax
; AVX512-NEXT: movzbl %al, %ecx
; AVX512-NEXT: kmovw %k1, %eax