From 8a0bfe490592de3df28d82c5dd69956e43c20f1d Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 4 Apr 2024 12:02:06 -0700
Subject: [SLP]Fix PR87630: wrong result for externally used vector value.

Need to check that the externally used value can be represented with the
BitWidth before applying it, otherwise need to keep wider type.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp              | 10 ++++++++++
 .../X86/external-user-instruction-minbitwidth.ll             | 12 ++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'llvm')
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 79d146a..bdd26ac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14141,6 +14141,16 @@ bool BoUpSLP::collectValuesToDemote(
       }))
     return FinalAnalysis();
 
+  if (!all_of(I->users(),
+              [=](User *U) {
+                return getTreeEntry(U) ||
+                       (UserIgnoreList && UserIgnoreList->contains(U)) ||
+                       (U->getType()->isSized() &&
+                        DL->getTypeSizeInBits(U->getType()) <= BitWidth);
+              }) &&
+      !IsPotentiallyTruncated(I, BitWidth))
+    return false;
+
   unsigned Start = 0;
   unsigned End = I->getNumOperands();
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll
index 9d3d602..84f7e21 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll
@@ -14,17 +14,13 @@ define i8 @test() {
 ; CHECK-NEXT:    [[CONV1:%.*]] = zext i16 [[TMP1]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = trunc <8 x i32> [[TMP3]] to <8 x i16>
-; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 -32767>
+; CHECK-NEXT:    [[TMP4:%.*]] = or <8 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 32769>
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV1]], i32 0
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16>
-; CHECK-NEXT:    [[TMP9:%.*]] = add <8 x i16> [[TMP5]], [[TMP8]]
-; CHECK-NEXT:    [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP9]])
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i16 [[TMP10]] to i32
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <8 x i32> [[TMP4]], [[TMP7]]
+; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP8]])
 ; CHECK-NEXT:    [[CONV4_30:%.*]] = trunc i32 [[TMP11]] to i8
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <8 x i16> [[TMP5]], i32 7
-; CHECK-NEXT:    [[TMP13:%.*]] = sext i16 [[TMP12]] to i32
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7
 ; CHECK-NEXT:    [[XOR_31:%.*]] = and i32 [[TMP13]], -2
 ; CHECK-NEXT:    store i32 [[XOR_31]], ptr @d, align 4
 ; CHECK-NEXT:    ret i8 [[CONV4_30]]
-- 
cgit v1.1