diff options
| author | Alexey Bataev <a.bataev@outlook.com> | 2025-10-31 15:07:26 -0700 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@outlook.com> | 2025-10-31 15:25:13 -0700 |
| commit | 964c7711f4384b08f051d17da888d35d03a3024a (patch) | |
| tree | a93165ba383dbb0e4f17b91814b08c034819c2d4 | |
| parent | d310693bde68b49cdb5c5877aadffb41d35c45fb (diff) | |
| download | llvm-964c7711f4384b08f051d17da888d35d03a3024a.zip llvm-964c7711f4384b08f051d17da888d35d03a3024a.tar.gz llvm-964c7711f4384b08f051d17da888d35d03a3024a.tar.bz2 | |
[SLP]Fix the minbitwidth analysis for slternate opcodes
If the laternate operation is more stricter than the main operation, we
cannot rely on the analysis of the main operation. In such case, better
to avoid doing the analysis at all, since it may affect the overall
result and lead to incorrect optimization
Fixes #165878
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 21 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll | 14 |
2 files changed, 27 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1b55a3b..34b405c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote( {VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(), VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()}); + if (E.isAltShuffle()) { + // Combining these opcodes may lead to incorrect analysis, skip for now. + auto IsDangerousOpcode = [](unsigned Opcode) { + switch (Opcode) { + case Instruction::Shl: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return true; + default: + break; + } + return false; + }; + if (IsDangerousOpcode(E.getAltOpcode())) + return FinalAnalysis(); + } + switch (E.getOpcode()) { // We can always demote truncations and extensions. Since truncations can diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll index cc2e16e..959b235 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll @@ -6,14 +6,12 @@ define float @test(i8 %0) { ; CHECK-SAME: i8 [[TMP0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 poison, i8 0>, i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i16> [[TMP2]], <i16 2, i16 27> -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i16> [[TMP2]], <i16 2, i16 27> -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> [[TMP4]], <2 x i32> <i32 0, i32 3> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP9]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 2, i32 27> +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], <i32 2, i32 27> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] ; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [ ; CHECK-NEXT: i32 0, label %[[EXIT]] |
