aboutsummaryrefslogtreecommitdiff
path: root/polly
diff options
context:
space:
mode:
authorKarthika Devi C <quic_kartc@quicinc.com>2024-01-31 09:50:15 +0530
committerGitHub <noreply@github.com>2024-01-30 20:20:15 -0800
commitfa3307eb3f47b0bd574fc754934f98c0f27e4e36 (patch)
tree69382429237a2653d3f1bc0239b646bd3d5df031 /polly
parentab70ac605e784c630122b27c5971fde68e80bd1b (diff)
downloadllvm-fa3307eb3f47b0bd574fc754934f98c0f27e4e36.zip
llvm-fa3307eb3f47b0bd574fc754934f98c0f27e4e36.tar.gz
llvm-fa3307eb3f47b0bd574fc754934f98c0f27e4e36.tar.bz2
[polly] Make reduction detection checks more robust - part 1 (#75297)
Existing reduction detection algorithm does two types of memory checks before marking a load store pair as reduction. First is to check if load and store are pointing to the same memory. This check right now detects the following case as reduction. sum[0] = sum[1] + A[i] This is because the check compares only base of the memory addresses involved and not their indices. This patch addresses this issue and introduces some debug prints. Added couple of test cases to verify the functionality of patch as well.
Diffstat (limited to 'polly')
-rw-r--r--polly/lib/Analysis/ScopBuilder.cpp27
-rw-r--r--polly/test/ScopInfo/reduction_different_index.ll38
-rw-r--r--polly/test/ScopInfo/reduction_different_index1.ll45
3 files changed, 107 insertions, 3 deletions
diff --git a/polly/lib/Analysis/ScopBuilder.cpp b/polly/lib/Analysis/ScopBuilder.cpp
index c62cb2a..4486164 100644
--- a/polly/lib/Analysis/ScopBuilder.cpp
+++ b/polly/lib/Analysis/ScopBuilder.cpp
@@ -2536,18 +2536,39 @@ bool hasIntersectingAccesses(isl::set AllAccs, MemoryAccess *LoadMA,
bool checkCandidatePairAccesses(MemoryAccess *LoadMA, MemoryAccess *StoreMA,
isl::set Domain,
SmallVector<MemoryAccess *, 8> &MemAccs) {
+ // First check if the base value is the same.
isl::map LoadAccs = LoadMA->getAccessRelation();
isl::map StoreAccs = StoreMA->getAccessRelation();
-
- // Skip those with obviously unequal base addresses.
bool Valid = LoadAccs.has_equal_space(StoreAccs);
+ LLVM_DEBUG(dbgs() << " == The accessed space below is "
+ << (Valid ? "" : "not ") << "equal!\n");
+ LLVM_DEBUG(LoadMA->dump(); StoreMA->dump());
+
+ if (Valid) {
+ // Then check if they actually access the same memory.
+ isl::map R = isl::manage(LoadAccs.copy())
+ .intersect_domain(isl::manage(Domain.copy()));
+ isl::map W = isl::manage(StoreAccs.copy())
+ .intersect_domain(isl::manage(Domain.copy()));
+ isl::set RS = R.range();
+ isl::set WS = W.range();
+
+ isl::set InterAccs =
+ isl::manage(RS.copy()).intersect(isl::manage(WS.copy()));
+ Valid = !InterAccs.is_empty();
+ LLVM_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "" : "not ")
+ << "overlapping!\n");
+ }
- // And check if the remaining for overlap with other memory accesses.
if (Valid) {
+ // Finally, check if they are no other instructions accessing this memory
isl::map AllAccsRel = LoadAccs.unite(StoreAccs);
AllAccsRel = AllAccsRel.intersect_domain(Domain);
isl::set AllAccs = AllAccsRel.range();
Valid = !hasIntersectingAccesses(AllAccs, LoadMA, StoreMA, Domain, MemAccs);
+
+ LLVM_DEBUG(dbgs() << " == The accessed memory is " << (Valid ? "not " : "")
+ << "accessed by other instructions!\n");
}
return Valid;
}
diff --git a/polly/test/ScopInfo/reduction_different_index.ll b/polly/test/ScopInfo/reduction_different_index.ll
new file mode 100644
index 0000000..575e5a1
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_different_index.ll
@@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s
+; Verify if the following case is not detected as reduction.
+;
+; void f(int *A,int *sum) {
+; for (int i = 0; i < 1024; i++)
+; sum[0] = sum[1] + A[i];
+; }
+;
+; Verify that we don't detect the reduction on sum
+;
+; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[1] };
+; CHECK-NEXT:ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[0] };
+;
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define dso_local void @f(ptr nocapture noundef readonly %A, ptr nocapture noundef %sum) local_unnamed_addr #0 {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry.split, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, ptr %sum, i64 1
+ %0 = load i32, ptr %arrayidx
+ %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx1
+ %add = add nsw i32 %1, %0
+ store i32 %add, ptr %sum
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
diff --git a/polly/test/ScopInfo/reduction_different_index1.ll b/polly/test/ScopInfo/reduction_different_index1.ll
new file mode 100644
index 0000000..39bd3c4
--- /dev/null
+++ b/polly/test/ScopInfo/reduction_different_index1.ll
@@ -0,0 +1,45 @@
+; RUN: opt %loadPolly -polly-print-scops -disable-output < %s | FileCheck %s
+; Verify if the following case is not detected as reduction.
+;
+; void f(int *A, int *sum, int i1, int i2) {
+; for (int i = 0; i < 1024; i++)
+; sum[i2] = sum[i1] + A[i];
+; }
+;
+; Verify that we don't detect the reduction on sum
+;
+; CHECK: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[i1] };
+; CHECK-NEXT:ReadAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_A[i0] };
+; CHECK-NEXT:MustWriteAccess := [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT: { Stmt_for_body[i0] -> MemRef_sum[i2] };
+;
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) uwtable
+define dso_local void @f(ptr nocapture noundef readonly %A, ptr nocapture noundef %sum, i32 noundef %i1, i32 noundef %i2) local_unnamed_addr #0 {
+entry:
+ br label %entry.split
+
+entry.split: ; preds = %entry
+ %idxprom = sext i32 %i1 to i64
+ %arrayidx = getelementptr inbounds i32, ptr %sum, i64 %idxprom
+ %idxprom3 = sext i32 %i2 to i64
+ %arrayidx4 = getelementptr inbounds i32, ptr %sum, i64 %idxprom3
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %entry.split, %for.body
+ %indvars.iv = phi i64 [ 0, %entry.split ], [ %indvars.iv.next, %for.body ]
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ store i32 %add, ptr %arrayidx4, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}