aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp8
-rw-r--r--llvm/test/CodeGen/PowerPC/sms-store-dependence.ll84
2 files changed, 89 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 81b7fdc..8cd7f4e 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -2225,7 +2225,7 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
}
/// Return true for an order or output dependence that is loop carried
-/// potentially. A dependence is loop carried if the destination defines a valu
+/// potentially. A dependence is loop carried if the destination defines a value
/// that may be used or defined by the source in a subsequent iteration.
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) {
@@ -2251,10 +2251,12 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
return true;
- // Only chain dependences between a load and store can be loop carried.
- if (!DI->mayStore() || !SI->mayLoad())
+ if (!DI->mayLoadOrStore() || !SI->mayLoadOrStore())
return false;
+ // The conservative assumption is that a dependence between memory operations
+ // may be loop carried. The following code checks when it can be proved that
+ // there is no loop carried dependence.
unsigned DeltaS, DeltaD;
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
diff --git a/llvm/test/CodeGen/PowerPC/sms-store-dependence.ll b/llvm/test/CodeGen/PowerPC/sms-store-dependence.ll
new file mode 100644
index 0000000..d1ec320d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sms-store-dependence.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -verify-machineinstrs\
+; RUN: -mcpu=pwr9 --ppc-enable-pipeliner 2>&1 | FileCheck %s
+
+; Test that the pipeliner schedules the store instructions correctly. Since
+; there is a dependence between the store, they cannot be scheduled further than
+; MII cycles/instructions apart. That is, the first store cannot occur multiple
+; times before the second ctore in the schedule.
+define dso_local void @comp_method(ptr noalias nocapture noundef readonly %0, ptr nocapture noundef writeonly %1, ptr nocapture noundef writeonly %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6, i64 %v1) local_unnamed_addr {
+; CHECK-LABEL: comp_method:
+; CHECK: # %bb.0:
+; CHECK-NEXT: extsw 7, 8
+; CHECK-NEXT: extsw 8, 9
+; CHECK-NEXT: clrldi 9, 6, 32
+; CHECK-NEXT: addi 6, 3, -1
+; CHECK-NEXT: mtctr 9
+; CHECK-NEXT: li 11, 0
+; CHECK-NEXT: sradi 12, 11, 2
+; CHECK-NEXT: add 5, 5, 8
+; CHECK-NEXT: li 8, 2
+; CHECK-NEXT: li 3, 8
+; CHECK-NEXT: addi 11, 7, 0
+; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; CHECK-NEXT: lbzu 9, 1(6)
+; CHECK-NEXT: add 12, 12, 10
+; CHECK-NEXT: extsb 9, 9
+; CHECK-NEXT: stbx 8, 4, 9
+; CHECK-NEXT: add 9, 9, 12
+; CHECK-NEXT: bdz .LBB0_2
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1:
+; CHECK-NEXT: lbzu 0, 1(6)
+; CHECK-NEXT: sradi 12, 11, 2
+; CHECK-NEXT: add 11, 11, 7
+; CHECK-NEXT: add 12, 12, 10
+; CHECK-NEXT: sldi 30, 9, 2
+; CHECK-NEXT: add 9, 9, 30
+; CHECK-NEXT: extsb 0, 0
+; CHECK-NEXT: stbx 3, 5, 9
+; CHECK-NEXT: add 9, 0, 12
+; CHECK-NEXT: stbx 8, 4, 0
+; CHECK-NEXT: bdnz .LBB0_1
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT: sldi 4, 9, 2
+; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; CHECK-NEXT: add 4, 9, 4
+; CHECK-NEXT: stbx 3, 5, 4
+; CHECK-NEXT: blr
+ %8 = icmp sgt i32 %3, 64
+ tail call void @llvm.assume(i1 %8)
+ %9 = and i32 %3, 1
+ %10 = icmp eq i32 %9, 0
+ tail call void @llvm.assume(i1 %10)
+ %11 = sext i32 %5 to i64
+ %12 = sext i32 %6 to i64
+ %13 = zext nneg i32 %3 to i64
+ %14 = getelementptr i8, ptr %2, i64 %12
+ br label %16
+
+15:
+ ret void
+
+16:
+ %17 = phi i64 [ 0, %7 ], [ %24, %16 ]
+ %18 = getelementptr inbounds i8, ptr %0, i64 %17
+ %19 = load i8, ptr %18, align 1
+ %20 = sext i8 %19 to i64
+ %21 = getelementptr inbounds i8, ptr %1, i64 %20
+ store i8 2, ptr %21, align 1
+ %22 = mul nsw i64 %17, %11
+ %a1 = ashr i64 %22, 2
+ %a2 = add i64 %a1, %v1
+ %a3 = add i64 %20, %a2
+ %a4 = mul nsw i64 %a3, 5
+ %23 = getelementptr i8, ptr %14, i64 %a4
+ store i8 8, ptr %23, align 1
+ %24 = add nuw nsw i64 %17, 1
+ %25 = icmp eq i64 %24, %13
+ br i1 %25, label %15, label %16
+}
+
+declare void @llvm.assume(i1 noundef) #1
+
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }