From 8a0bfe490592de3df28d82c5dd69956e43c20f1d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 4 Apr 2024 12:02:06 -0700 Subject: [SLP]Fix PR87630: wrong result for externally used vector value. Need to check that the externally used value can be represented with the BitWidth before applying it, otherwise need to keep wider type. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++++++ .../X86/external-user-instruction-minbitwidth.ll | 12 ++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) (limited to 'llvm') diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 79d146a..bdd26ac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14141,6 +14141,16 @@ bool BoUpSLP::collectValuesToDemote( })) return FinalAnalysis(); + if (!all_of(I->users(), + [=](User *U) { + return getTreeEntry(U) || + (UserIgnoreList && UserIgnoreList->contains(U)) || + (U->getType()->isSized() && + DL->getTypeSizeInBits(U->getType()) <= BitWidth); + }) && + !IsPotentiallyTruncated(I, BitWidth)) + return false; + unsigned Start = 0; unsigned End = I->getNumOperands(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll index 9d3d602..84f7e21 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll @@ -14,17 +14,13 @@ define i8 @test() { ; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i32> [[TMP3]] to <8 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP8]]) ; CHECK-NEXT: [[CONV4_30:%.*]] = trunc i32 [[TMP11]] to i8 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP5]], i32 7 -; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7 ; CHECK-NEXT: [[XOR_31:%.*]] = and i32 [[TMP13]], -2 ; CHECK-NEXT: store i32 [[XOR_31]], ptr @d, align 4 ; CHECK-NEXT: ret i8 [[CONV4_30]] -- cgit v1.1