diff options
author | Alexey Bataev <a.bataev@outlook.com> | 2024-03-27 14:32:13 -0700 |
---|---|---|
committer | Alexey Bataev <a.bataev@outlook.com> | 2024-03-27 14:34:59 -0700 |
commit | d94dc5f0d63be3d786224f57c061ef16687fca9a (patch) | |
tree | ce96c0ebef93a145dc437a119abbf861f2946f4e | |
parent | 742a82a729925dc79641beb649f492003be40725 (diff) | |
download | llvm-d94dc5f0d63be3d786224f57c061ef16687fca9a.zip llvm-d94dc5f0d63be3d786224f57c061ef16687fca9a.tar.gz llvm-d94dc5f0d63be3d786224f57c061ef16687fca9a.tar.bz2 |
[SLP]Fix PR86763: do not truncate reductions to the demanded bits size.
Need to adjust ReductionBitWIdth after minbitwidth analysis, if the
demanded bits analysis sjows tht its size is less than the size of the
vectorized value. It prevents incorrect sign-zero extension
transformation after.
3 files changed, 11 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index e1f26b9..7f52884 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14415,6 +14415,13 @@ void BoUpSLP::computeMinimumValueSizes() { unsigned MaxBitWidth = ComputeMaxBitWidth( TreeRoot, VectorizableTree[NodeIdx]->getVectorFactor(), IsTopRoot, IsProfitableToDemoteRoot, Opcode, Limit, IsTruncRoot); + if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { + if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) + ReductionBitWidth = bit_ceil(MaxBitWidth); + else if (MaxBitWidth == 0) + ReductionBitWidth = 0; + } + for (unsigned Idx : RootDemotes) ToDemote.append(VectorizableTree[Idx]->Scalars.begin(), VectorizableTree[Idx]->Scalars.end()); diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll index 611003a..7771e83 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reduction-extension-after-bitwidth.ll @@ -6,8 +6,8 @@ define i32 @test(ptr %0, ptr %1) { ; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOAD_5:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> <i1 true, i1 true, i1 true, i1 true>) -; CHECK-NEXT: [[TMP3:%.*]] = sext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> <i8 1, i8 1, i8 1, i8 1>) +; CHECK-NEXT: [[TMP3:%.*]] = sext i8 [[TMP2]] to i32 ; CHECK-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[LOAD_5]] ; CHECK-NEXT: ret i32 [[OP_RDX]] ; diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll index cfe3ca9..7b4e2b0 100644 --- a/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/minbitwidth-root-trunc.ll @@ -11,9 +11,8 @@ define void @test(ptr %a, i8 %0, i16 %b.promoted.i) { ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i128> [[TMP5]], <4 x i128> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i128> [[TMP6]] to <4 x i16> ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP4]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i16> [[TMP8]] to <4 x i1> -; CHECK-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = zext i1 [[TMP10]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> [[TMP8]]) +; CHECK-NEXT: [[TMP11:%.*]] = zext i16 [[TMP9]] to i64 ; CHECK-NEXT: [[OP_RDX:%.*]] = and i64 [[TMP11]], 1 ; CHECK-NEXT: store i64 [[OP_RDX]], ptr [[A]], align 8 ; CHECK-NEXT: ret void |