aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp21
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll14
2 files changed, 27 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1b55a3b..34b405c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote(
{VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(),
VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()});
+ if (E.isAltShuffle()) {
+ // Combining these opcodes may lead to incorrect analysis, skip for now.
+ auto IsDangerousOpcode = [](unsigned Opcode) {
+ switch (Opcode) {
+ case Instruction::Shl:
+ case Instruction::AShr:
+ case Instruction::LShr:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ };
+ if (IsDangerousOpcode(E.getAltOpcode()))
+ return FinalAnalysis();
+ }
+
switch (E.getOpcode()) {
// We can always demote truncations and extensions. Since truncations can
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll
index cc2e16e..959b235 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll
@@ -6,14 +6,12 @@ define float @test(i8 %0) {
; CHECK-SAME: i8 [[TMP0:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 poison, i8 0>, i8 [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
-; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i16> [[TMP2]], <i16 2, i16 27>
-; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i16> [[TMP2]], <i16 2, i16 27>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> [[TMP4]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP9]] to i32
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
-; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP10]] to i32
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 2, i32 27>
+; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], <i32 2, i32 27>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [
; CHECK-NEXT: i32 0, label %[[EXIT]]