aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/LoopInterchange/reduction2mem.ll
blob: 1c19781d19eac0fb6566bd1b9f8eb88854925871 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; NOTE: Support the reduction in the inner loop.
; RUN: opt < %s -passes="loop-interchange"  -loop-interchange-reduction-to-mem -loop-interchange-profitabilities=ignore -S | FileCheck %s

; for (int i = 0; i < n; i++) {
;   r = 0;
;   for (int j = 0; j < n; j++)
;     r = r + a[j][i] * b[j][i];
;   s[i] = r;
; }

define void @func(ptr noalias readonly %a, ptr noalias readonly %b, ptr noalias writeonly %s, i64  %n) {
; CHECK-LABEL: define void @func(
; CHECK-SAME: ptr noalias readonly [[A:%.*]], ptr noalias readonly [[B:%.*]], ptr noalias writeonly [[S:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*:]]
; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i64 [[N]], 0
; CHECK-NEXT:    br i1 [[CMP]], label %[[INNERLOOP_PREHEADER:.*]], label %[[EXIT:.*]]
; CHECK:       [[OUTERLOOPHEADER_PREHEADER:.*]]:
; CHECK-NEXT:    br label %[[OUTERLOOP_HEADER:.*]]
; CHECK:       [[OUTERLOOP_HEADER]]:
; CHECK-NEXT:    [[INDEX_I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[OUTERLOOP_LATCH:.*]] ], [ 0, %[[OUTERLOOPHEADER_PREHEADER]] ]
; CHECK-NEXT:    [[ADDR_S:%.*]] = getelementptr inbounds nuw double, ptr [[S]], i64 [[INDEX_I]]
; CHECK-NEXT:    [[ADDR_A:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[INDEX_I]]
; CHECK-NEXT:    [[ADDR_B:%.*]] = getelementptr inbounds nuw double, ptr [[B]], i64 [[INDEX_I]]
; CHECK-NEXT:    br label %[[INNERLOOP_SPLIT1:.*]]
; CHECK:       [[INNERLOOP_PREHEADER]]:
; CHECK-NEXT:    br label %[[INNERLOOP:.*]]
; CHECK:       [[INNERLOOP]]:
; CHECK-NEXT:    [[INDEX_J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[INNERLOOP_SPLIT:.*]] ], [ 0, %[[INNERLOOP_PREHEADER]] ]
; CHECK-NEXT:    [[DEAD_REDUCTION:%.*]] = phi double [ [[ADD_LCSSA:%.*]], %[[INNERLOOP_SPLIT]] ], [ 0.000000e+00, %[[INNERLOOP_PREHEADER]] ]
; CHECK-NEXT:    [[FIRSTITER:%.*]] = phi i1 [ false, %[[INNERLOOP_SPLIT]] ], [ true, %[[INNERLOOP_PREHEADER]] ]
; CHECK-NEXT:    br label %[[OUTERLOOPHEADER_PREHEADER]]
; CHECK:       [[INNERLOOP_SPLIT1]]:
; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[ADDR_S]], align 8
; CHECK-NEXT:    [[NEW_VAR:%.*]] = select i1 [[FIRSTITER]], double [[TMP0]], double 0.000000e+00
; CHECK-NEXT:    [[ADDR_A_J_I:%.*]] = getelementptr inbounds nuw double, ptr [[ADDR_A]], i64 [[INDEX_J]]
; CHECK-NEXT:    [[A_J_I:%.*]] = load double, ptr [[ADDR_A_J_I]], align 8
; CHECK-NEXT:    [[ADDR_B_J_I:%.*]] = getelementptr inbounds nuw double, ptr [[ADDR_B]], i64 [[INDEX_J]]
; CHECK-NEXT:    [[B_J_I:%.*]] = load double, ptr [[ADDR_B_J_I]], align 8
; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[B_J_I]], [[A_J_I]]
; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[MUL]], [[NEW_VAR]]
; CHECK-NEXT:    store double [[ADD]], ptr [[ADDR_S]], align 8
; CHECK-NEXT:    [[DEAD_J_NEXT:%.*]] = add nuw nsw i64 [[INDEX_J]], 1
; CHECK-NEXT:    [[DEAD_COND:%.*]] = icmp eq i64 [[DEAD_J_NEXT]], [[N]]
; CHECK-NEXT:    br label %[[OUTERLOOP_LATCH]]
; CHECK:       [[INNERLOOP_SPLIT]]:
; CHECK-NEXT:    [[ADD_LCSSA]] = phi double [ [[ADD]], %[[OUTERLOOP_LATCH]] ]
; CHECK-NEXT:    [[DEAD_LCSSA:%.*]] = phi double [ [[ADD]], %[[OUTERLOOP_LATCH]] ]
; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[INDEX_J]], 1
; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i64 [[J_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CMP1]], label %[[EXIT_LOOPEXIT:.*]], label %[[INNERLOOP]]
; CHECK:       [[OUTERLOOP_LATCH]]:
; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[INDEX_I]], 1
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; CHECK-NEXT:    br i1 [[CMP2]], label %[[INNERLOOP_SPLIT]], label %[[OUTERLOOP_HEADER]]
; CHECK:       [[EXIT_LOOPEXIT]]:
; CHECK-NEXT:    br label %[[EXIT]]
; CHECK:       [[EXIT]]:
; CHECK-NEXT:    ret void
;
entry:
  %cmp = icmp sgt i64 %n, 0
  br i1 %cmp, label %outerloop_header, label %exit

outerloop_header:
  %index_i = phi i64 [ 0, %entry ], [ %index_i.next, %outerloop_latch ]
  %addr_s = getelementptr inbounds nuw double, ptr %s, i64 %index_i
  %invariant.gep.us = getelementptr inbounds nuw double, ptr %a, i64 %index_i
  %invariant.gep32.us = getelementptr inbounds nuw double, ptr %b, i64 %index_i
  br label %innerloop

innerloop:
  %index_j = phi i64 [ 0, %outerloop_header ], [ %index_j.next, %innerloop ]
  %reduction = phi double [ 0.000000e+00, %outerloop_header ], [ %add, %innerloop ]
  %addr_a_j_i = getelementptr inbounds nuw double, ptr %invariant.gep.us, i64 %index_j
  %0 = load double, ptr %addr_a_j_i, align 8
  %addr_b_j_i = getelementptr inbounds nuw double, ptr %invariant.gep32.us, i64 %index_j
  %1 = load double, ptr %addr_b_j_i, align 8
  %mul = fmul fast double %1, %0
  %add = fadd fast double %mul, %reduction
  %index_j.next = add nuw nsw i64 %index_j, 1
  %cond1 = icmp eq i64 %index_j.next, %n
  br i1 %cond1, label %outerloop_latch, label %innerloop

outerloop_latch:
  %lcssa = phi double [ %add, %innerloop ]
  store double %lcssa, ptr %addr_s, align 8
  %index_i.next = add nuw nsw i64 %index_i, 1
  %cond2 = icmp eq i64 %index_i.next, %n
  br i1 %cond2, label %exit, label %outerloop_header

exit:
  ret void
}