1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; NOTE: Support the reduction in the inner loop.
; RUN: opt < %s -passes="loop-interchange" -loop-interchange-reduction-to-mem -loop-interchange-profitabilities=ignore -S | FileCheck %s
; for (int i = 0; i < n; i++) {
; r = 0;
; for (int j = 0; j < n; j++)
; r = r + a[j][i] * b[j][i];
; s[i] = r;
; }
define void @func(ptr noalias readonly %a, ptr noalias readonly %b, ptr noalias writeonly %s, i64 %n) {
; CHECK-LABEL: define void @func(
; CHECK-SAME: ptr noalias readonly [[A:%.*]], ptr noalias readonly [[B:%.*]], ptr noalias writeonly [[S:%.*]], i64 [[N:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[INNERLOOP_PREHEADER:.*]], label %[[EXIT:.*]]
; CHECK: [[OUTERLOOPHEADER_PREHEADER:.*]]:
; CHECK-NEXT: br label %[[OUTERLOOP_HEADER:.*]]
; CHECK: [[OUTERLOOP_HEADER]]:
; CHECK-NEXT: [[INDEX_I:%.*]] = phi i64 [ [[I_NEXT:%.*]], %[[OUTERLOOP_LATCH:.*]] ], [ 0, %[[OUTERLOOPHEADER_PREHEADER]] ]
; CHECK-NEXT: [[ADDR_S:%.*]] = getelementptr inbounds nuw double, ptr [[S]], i64 [[INDEX_I]]
; CHECK-NEXT: [[ADDR_A:%.*]] = getelementptr inbounds nuw double, ptr [[A]], i64 [[INDEX_I]]
; CHECK-NEXT: [[ADDR_B:%.*]] = getelementptr inbounds nuw double, ptr [[B]], i64 [[INDEX_I]]
; CHECK-NEXT: br label %[[INNERLOOP_SPLIT1:.*]]
; CHECK: [[INNERLOOP_PREHEADER]]:
; CHECK-NEXT: br label %[[INNERLOOP:.*]]
; CHECK: [[INNERLOOP]]:
; CHECK-NEXT: [[INDEX_J:%.*]] = phi i64 [ [[J_NEXT:%.*]], %[[INNERLOOP_SPLIT:.*]] ], [ 0, %[[INNERLOOP_PREHEADER]] ]
; CHECK-NEXT: [[DEAD_REDUCTION:%.*]] = phi double [ [[ADD_LCSSA:%.*]], %[[INNERLOOP_SPLIT]] ], [ 0.000000e+00, %[[INNERLOOP_PREHEADER]] ]
; CHECK-NEXT: [[FIRSTITER:%.*]] = phi i1 [ false, %[[INNERLOOP_SPLIT]] ], [ true, %[[INNERLOOP_PREHEADER]] ]
; CHECK-NEXT: br label %[[OUTERLOOPHEADER_PREHEADER]]
; CHECK: [[INNERLOOP_SPLIT1]]:
; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ADDR_S]], align 8
; CHECK-NEXT: [[NEW_VAR:%.*]] = select i1 [[FIRSTITER]], double [[TMP0]], double 0.000000e+00
; CHECK-NEXT: [[ADDR_A_J_I:%.*]] = getelementptr inbounds nuw double, ptr [[ADDR_A]], i64 [[INDEX_J]]
; CHECK-NEXT: [[A_J_I:%.*]] = load double, ptr [[ADDR_A_J_I]], align 8
; CHECK-NEXT: [[ADDR_B_J_I:%.*]] = getelementptr inbounds nuw double, ptr [[ADDR_B]], i64 [[INDEX_J]]
; CHECK-NEXT: [[B_J_I:%.*]] = load double, ptr [[ADDR_B_J_I]], align 8
; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[B_J_I]], [[A_J_I]]
; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[MUL]], [[NEW_VAR]]
; CHECK-NEXT: store double [[ADD]], ptr [[ADDR_S]], align 8
; CHECK-NEXT: [[DEAD_J_NEXT:%.*]] = add nuw nsw i64 [[INDEX_J]], 1
; CHECK-NEXT: [[DEAD_COND:%.*]] = icmp eq i64 [[DEAD_J_NEXT]], [[N]]
; CHECK-NEXT: br label %[[OUTERLOOP_LATCH]]
; CHECK: [[INNERLOOP_SPLIT]]:
; CHECK-NEXT: [[ADD_LCSSA]] = phi double [ [[ADD]], %[[OUTERLOOP_LATCH]] ]
; CHECK-NEXT: [[DEAD_LCSSA:%.*]] = phi double [ [[ADD]], %[[OUTERLOOP_LATCH]] ]
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[INDEX_J]], 1
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[J_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT_LOOPEXIT:.*]], label %[[INNERLOOP]]
; CHECK: [[OUTERLOOP_LATCH]]:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[INDEX_I]], 1
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP2]], label %[[INNERLOOP_SPLIT]], label %[[OUTERLOOP_HEADER]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
%cmp = icmp sgt i64 %n, 0
br i1 %cmp, label %outerloop_header, label %exit
outerloop_header:
%index_i = phi i64 [ 0, %entry ], [ %index_i.next, %outerloop_latch ]
%addr_s = getelementptr inbounds nuw double, ptr %s, i64 %index_i
%invariant.gep.us = getelementptr inbounds nuw double, ptr %a, i64 %index_i
%invariant.gep32.us = getelementptr inbounds nuw double, ptr %b, i64 %index_i
br label %innerloop
innerloop:
%index_j = phi i64 [ 0, %outerloop_header ], [ %index_j.next, %innerloop ]
%reduction = phi double [ 0.000000e+00, %outerloop_header ], [ %add, %innerloop ]
%addr_a_j_i = getelementptr inbounds nuw double, ptr %invariant.gep.us, i64 %index_j
%0 = load double, ptr %addr_a_j_i, align 8
%addr_b_j_i = getelementptr inbounds nuw double, ptr %invariant.gep32.us, i64 %index_j
%1 = load double, ptr %addr_b_j_i, align 8
%mul = fmul fast double %1, %0
%add = fadd fast double %mul, %reduction
%index_j.next = add nuw nsw i64 %index_j, 1
%cond1 = icmp eq i64 %index_j.next, %n
br i1 %cond1, label %outerloop_latch, label %innerloop
outerloop_latch:
%lcssa = phi double [ %add, %innerloop ]
store double %lcssa, ptr %addr_s, align 8
%index_i.next = add nuw nsw i64 %index_i, 1
%cond2 = icmp eq i64 %index_i.next, %n
br i1 %cond2, label %exit, label %outerloop_header
exit:
ret void
}
|