diff options
| author | David Sherwood <david.sherwood@arm.com> | 2026-02-09 13:42:26 +0000 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-02-09 13:42:26 +0000 |
| commit | 44031ae79f7d3a4a2f56efecb75d4467955c043c (patch) | |
| tree | d2778dae5ac4ea9b0e56c97d5b51b34c181fd3da /llvm/test/Transforms/LoopVectorize | |
| parent | 0bbf25ffd4b2e665e2a36adcd11b732ac8bb74e6 (diff) | |
| download | llvm-44031ae79f7d3a4a2f56efecb75d4467955c043c.tar.gz llvm-44031ae79f7d3a4a2f56efecb75d4467955c043c.tar.bz2 llvm-44031ae79f7d3a4a2f56efecb75d4467955c043c.zip | |
[LV] Fix issue in VPFirstOrderRecurrencePHIRecipe::usesFirstLaneOnly (#179977)
In some cases we decide to vectorise loops with first-order recurrences
using VF=1, IC>1. We then attempt to unroll a vplan in replicateByVF,
however when trying to erase the list of values from the parent we
trigger the following assert:
```
virtual llvm::VPRecipeValue::~VPRecipeValue(): Assertion `Users.empty()
&& "trying to delete a VPRecipeValue with remaining users"' failed.
```
The problem seems to stem from this code:
```
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) {
return U.usesFirstLaneOnly(DefR);
});
```
since usesFirstLaneOnly returns false and we fail to replace uses of
DefR with LaneDefs[0]. Upon inspection the only VPUser objects that
return false are VPInstruction::FirstOrderRecurrenceSplice and
VPFirstOrderRecurrencePHIRecipe. Since the values are all scalar it's
simply not possible for us to be using anything other than the first
lane. I've fixed this by bailing out of replicateByVF early for plans with
only a scalar VF.
Fixes https://github.com/llvm/llvm-project/issues/179671
Diffstat (limited to 'llvm/test/Transforms/LoopVectorize')
| -rw-r--r-- | llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll | 52 |
1 files changed, 52 insertions, 0 deletions
diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index c77afa870e2c..82470d3db0a2 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -380,3 +380,55 @@ exit: ret void } +define void @pr179671(ptr align 8 dereferenceable(120) %p, ptr %a, i32 %b) { +; CHECK-LABEL: define void @pr179671( +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi ptr [ %a, %vector.ph ], [ [[NEXT_GEP3:%.*]], %vector.body ] +; CHECK-NEXT: [[DOTCAST1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[DOTCAST1]], 3 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 %b, [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = mul i64 [[INDEX]], 128 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX2]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX2]], 128 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP15]] +; CHECK-NEXT: [[NEXT_GEP3]] = getelementptr i8, ptr null, i64 [[TMP12]] +; CHECK-NEXT: store ptr [[VECTOR_RECUR]], ptr [[NEXT_GEP]], align 8 +; CHECK-NEXT: store ptr [[NEXT_GEP]], ptr [[NEXT_GEP3]], align 8 +; CHECK-NEXT: store ptr [[NEXT_GEP3]], ptr [[INV_PTR:%.*]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[TMP11]], 3 +; CHECK-NEXT: store i32 [[TMP13]], ptr [[INV_PTR2:%.*]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC:%.*]] +; CHECK-NEXT: br i1 [[TMP14]], label %[[LOOP_1:.*]], label %vector.body +entry: + %inv_ptr = getelementptr inbounds nuw i8, ptr %p, i64 24 + %inv_ptr2 = getelementptr inbounds nuw i8, ptr %p, i64 40 + br label %loop.header + +loop.header: + %load23 = phi i32 [ %b, %entry ], [ %sadd_val, %loop.5 ] + %load12 = phi ptr [ %a, %entry ], [ %phi_ptr1, %loop.5 ] + %phi_ptr1 = phi ptr [ null, %entry ], [ %phi_ptr_next, %loop.5 ] + %phi_ptr_next = getelementptr i8, ptr %phi_ptr1, i64 128 + store ptr %load12, ptr %phi_ptr1, align 8 + br label %loop.3 + +loop.3: + store ptr %phi_ptr1, ptr %inv_ptr, align 8 + %sadd_val = add i32 %load23, 3 + %sadd_ov = icmp eq i32 %sadd_val, 8 + br i1 %sadd_ov, label %exit.0, label %loop.5 + +loop.5: + store i32 %sadd_val, ptr %inv_ptr2, align 8 + br label %loop.header + +exit.0: + store i32 0, ptr %p, align 4 + ret void + +exit: + ret void +} |
