aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp7
-rw-r--r--llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll76
2 files changed, 79 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c4582df..894dc68 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5355,11 +5355,10 @@ static bool clusterSortPtrAccesses(ArrayRef<Value *> VL,
SmallPtrSet<Value *, 13> SecondPointers;
Value *P1 = Ptr1;
Value *P2 = Ptr2;
- if (P1 == P2)
- return false;
unsigned Depth = 0;
- while (!FirstPointers.contains(P2) && !SecondPointers.contains(P1) &&
- Depth <= RecursionMaxDepth) {
+ while (!FirstPointers.contains(P2) && !SecondPointers.contains(P1)) {
+ if (P1 == P2 || Depth > RecursionMaxDepth)
+ return false;
FirstPointers.insert(P1);
SecondPointers.insert(P2);
P1 = getUnderlyingObject(P1, /*MaxLookup=*/1);
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll
new file mode 100644
index 0000000..cf1ed54
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/long-gep-chains.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=slp-vectorizer -mtriple=riscv64-unknown-linux -mattr=+v < %s | FileCheck %s
+
+define i64 @test(ptr %arg, i32 %arg1, i64 %i) {
+; CHECK-LABEL: define i64 @test(
+; CHECK-SAME: ptr [[ARG:%.*]], i32 [[ARG1:%.*]], i64 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[I2:%.*]] = getelementptr i8, ptr [[ARG]], i64 [[I]]
+; CHECK-NEXT: [[I3:%.*]] = getelementptr i8, ptr [[I2]], i64 [[I]]
+; CHECK-NEXT: [[I4:%.*]] = getelementptr i8, ptr [[I3]], i64 [[I]]
+; CHECK-NEXT: [[I5:%.*]] = getelementptr i8, ptr [[I4]], i64 [[I]]
+; CHECK-NEXT: [[I6:%.*]] = getelementptr i8, ptr [[I5]], i64 [[I]]
+; CHECK-NEXT: [[I7:%.*]] = getelementptr i8, ptr [[I6]], i64 [[I]]
+; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr [[I7]], i64 [[I]]
+; CHECK-NEXT: [[I9:%.*]] = getelementptr i8, ptr [[I8]], i64 [[I]]
+; CHECK-NEXT: [[I10:%.*]] = getelementptr i8, ptr [[I9]], i64 [[I]]
+; CHECK-NEXT: [[I11:%.*]] = getelementptr i8, ptr [[I10]], i64 [[I]]
+; CHECK-NEXT: [[I12:%.*]] = getelementptr i8, ptr [[I11]], i64 [[I]]
+; CHECK-NEXT: [[I13:%.*]] = getelementptr i8, ptr [[I12]], i64 [[I]]
+; CHECK-NEXT: [[I14:%.*]] = getelementptr i8, ptr [[I13]], i64 [[I]]
+; CHECK-NEXT: [[I140:%.*]] = load i8, ptr [[I14]], align 1
+; CHECK-NEXT: [[I1412:%.*]] = zext i8 [[I140]] to i32
+; CHECK-NEXT: [[I142:%.*]] = mul i32 [[ARG1]], [[I1412]]
+; CHECK-NEXT: [[I143:%.*]] = getelementptr i8, ptr [[I13]], i64 15
+; CHECK-NEXT: [[I144:%.*]] = load i8, ptr [[I143]], align 1
+; CHECK-NEXT: [[I1453:%.*]] = zext i8 [[I144]] to i32
+; CHECK-NEXT: [[I146:%.*]] = mul i32 [[ARG1]], [[I1453]]
+; CHECK-NEXT: [[I147:%.*]] = getelementptr i8, ptr [[I13]], i64 14
+; CHECK-NEXT: [[I148:%.*]] = load i8, ptr [[I147]], align 1
+; CHECK-NEXT: [[I1494:%.*]] = zext i8 [[I148]] to i32
+; CHECK-NEXT: [[I150:%.*]] = mul i32 [[ARG1]], [[I1494]]
+; CHECK-NEXT: [[I151:%.*]] = getelementptr i8, ptr [[I13]], i64 13
+; CHECK-NEXT: [[I152:%.*]] = load i8, ptr [[I151]], align 1
+; CHECK-NEXT: [[I1535:%.*]] = zext i8 [[I152]] to i32
+; CHECK-NEXT: [[I154:%.*]] = mul i32 [[ARG1]], [[I1535]]
+; CHECK-NEXT: [[I1311:%.*]] = or i32 [[I142]], [[I146]]
+; CHECK-NEXT: [[I1312:%.*]] = or i32 [[I1311]], [[I150]]
+; CHECK-NEXT: [[I1313:%.*]] = or i32 [[I1312]], [[I154]]
+; CHECK-NEXT: [[I1536:%.*]] = zext i32 [[I1313]] to i64
+; CHECK-NEXT: ret i64 [[I1536]]
+;
+bb:
+ %i2 = getelementptr i8, ptr %arg, i64 %i
+ %i3 = getelementptr i8, ptr %i2, i64 %i
+ %i4 = getelementptr i8, ptr %i3, i64 %i
+ %i5 = getelementptr i8, ptr %i4, i64 %i
+ %i6 = getelementptr i8, ptr %i5, i64 %i
+ %i7 = getelementptr i8, ptr %i6, i64 %i
+ %i8 = getelementptr i8, ptr %i7, i64 %i
+ %i9 = getelementptr i8, ptr %i8, i64 %i
+ %i10 = getelementptr i8, ptr %i9, i64 %i
+ %i11 = getelementptr i8, ptr %i10, i64 %i
+ %i12 = getelementptr i8, ptr %i11, i64 %i
+ %i13 = getelementptr i8, ptr %i12, i64 %i
+ %i14 = getelementptr i8, ptr %i13, i64 %i
+ %i140 = load i8, ptr %i14, align 1
+ %i1412 = zext i8 %i140 to i32
+ %i142 = mul i32 %arg1, %i1412
+ %i143 = getelementptr i8, ptr %i13, i64 15
+ %i144 = load i8, ptr %i143, align 1
+ %i1453 = zext i8 %i144 to i32
+ %i146 = mul i32 %arg1, %i1453
+ %i147 = getelementptr i8, ptr %i13, i64 14
+ %i148 = load i8, ptr %i147, align 1
+ %i1494 = zext i8 %i148 to i32
+ %i150 = mul i32 %arg1, %i1494
+ %i151 = getelementptr i8, ptr %i13, i64 13
+ %i152 = load i8, ptr %i151, align 1
+ %i1535 = zext i8 %i152 to i32
+ %i154 = mul i32 %arg1, %i1535
+ %i1311 = or i32 %i142, %i146
+ %i1312 = or i32 %i1311, %i150
+ %i1313 = or i32 %i1312, %i154
+ %i1536 = zext i32 %i1313 to i64
+ ret i64 %i1536
+}