aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
blob: 6819ec377ab17dd70e4db392113536f492020f7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
; RUN: opt -S -passes=loop-simplify,loop-fusion -debug-only=loop-fusion -disable-output < %s 2>&1 | FileCheck %s
; REQUIRES: asserts

@B = common global [1024 x i32] zeroinitializer, align 16

; Check that the two candidates for fusion are placed into separate candidate
; sets because they are not control flow equivalent.

; CHECK: Performing Loop Fusion on function non_cfe
; CHECK: Fusion Candidates:
; CHECK: *** Fusion Candidate Set ***
; CHECK: bb
; CHECK: ****************************
; CHECK: *** Fusion Candidate Set ***
; CHECK: bb20.preheader
; CHECK: ****************************
; CHECK: Loop Fusion complete
define void @non_cfe(ptr noalias %arg, i32 %N) {
bb:
  br label %bb7

bb7:                                              ; preds = %bb, %bb14
  %.014 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
  %indvars.iv23 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ]
  %tmp = add nsw i32 %.014, -3
  %tmp8 = add nuw nsw i64 %indvars.iv23, 3
  %tmp9 = trunc i64 %tmp8 to i32
  %tmp10 = mul nsw i32 %tmp, %tmp9
  %tmp11 = trunc i64 %indvars.iv23 to i32
  %tmp12 = srem i32 %tmp10, %tmp11
  %tmp13 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv23
  store i32 %tmp12, ptr %tmp13, align 4
  br label %bb14

bb14:                                             ; preds = %bb7
  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
  %tmp15 = add nuw nsw i32 %.014, 1
  %exitcond4 = icmp ne i64 %indvars.iv.next3, 100
  br i1 %exitcond4, label %bb7, label %bb34

bb34:
  %cmp = icmp slt i32 %N, 50
  br i1 %cmp, label %bb16, label %bb33

bb16:                                             ; preds = %bb34
  %tmp17 = load i32, ptr %arg, align 4
  %tmp18 = icmp slt i32 %tmp17, 0
  br i1 %tmp18, label %bb20.preheader, label %bb33

bb20.preheader:                                   ; preds = %bb16
  br label %bb22

bb22:                                             ; preds = %bb20.preheader, %bb30
  %.02 = phi i32 [ 0, %bb20.preheader ], [ %tmp31, %bb30 ]
  %indvars.iv1 = phi i64 [ 0, %bb20.preheader ], [ %indvars.iv.next, %bb30 ]
  %tmp23 = add nsw i32 %.02, -3
  %tmp24 = add nuw nsw i64 %indvars.iv1, 3
  %tmp25 = trunc i64 %tmp24 to i32
  %tmp26 = mul nsw i32 %tmp23, %tmp25
  %tmp27 = trunc i64 %indvars.iv1 to i32
  %tmp28 = srem i32 %tmp26, %tmp27
  %tmp29 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv1
  store i32 %tmp28, ptr %tmp29, align 4
  br label %bb30

bb30:                                             ; preds = %bb22
  %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
  %tmp31 = add nuw nsw i32 %.02, 1
  %exitcond = icmp ne i64 %indvars.iv.next, 100
  br i1 %exitcond, label %bb22, label %bb33.loopexit

bb33.loopexit:                                    ; preds = %bb30
  br label %bb33

bb33:                                             ; preds = %bb33.loopexit, %bb16, %bb34
  ret void
}

; Check that fusion detects the two canddates are not adjacent (the exit block
; of the first candidate is not the preheader of the second candidate).

; CHECK: Performing Loop Fusion on function non_adjacent
; CHECK: Fusion Candidates:
; CHECK: *** Fusion Candidate Set ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
; CHECK: Attempting fusion on Candidate Set:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Fusion candidates are not adjacent. Not fusing.
; CHECK: Loop Fusion complete
define void @non_adjacent(ptr noalias %arg) {
bb:
  br label %bb5

bb4:                                              ; preds = %bb11
  br label %bb13

bb5:                                              ; preds = %bb, %bb11
  %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
  %tmp = add nsw i64 %.013, -3
  %tmp6 = add nuw nsw i64 %.013, 3
  %tmp7 = mul nsw i64 %tmp, %tmp6
  %tmp8 = srem i64 %tmp7, %.013
  %tmp9 = trunc i64 %tmp8 to i32
  %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %.013
  store i32 %tmp9, ptr %tmp10, align 4
  br label %bb11

bb11:                                             ; preds = %bb5
  %tmp12 = add nuw nsw i64 %.013, 1
  %exitcond2 = icmp ne i64 %tmp12, 100
  br i1 %exitcond2, label %bb5, label %bb4

bb13:                                             ; preds = %bb4
  br label %bb16

bb15:                                             ; preds = %bb23
  br label %bb25

bb16:                                             ; preds = %bb13, %bb23
  %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
  %tmp17 = add nsw i64 %.02, -3
  %tmp18 = add nuw nsw i64 %.02, 3
  %tmp19 = mul nsw i64 %tmp17, %tmp18
  %tmp20 = srem i64 %tmp19, %.02
  %tmp21 = trunc i64 %tmp20 to i32
  %tmp22 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %.02
  store i32 %tmp21, ptr %tmp22, align 4
  br label %bb23

bb23:                                             ; preds = %bb16
  %tmp24 = add nuw nsw i64 %.02, 1
  %exitcond = icmp ne i64 %tmp24, 100
  br i1 %exitcond, label %bb16, label %bb15

bb25:                                             ; preds = %bb15
  ret void
}

; Check that the different bounds are detected and prevent fusion.

; CHECK: Performing Loop Fusion on function different_bounds
; CHECK: Fusion Candidates:
; CHECK: *** Fusion Candidate Set ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
; CHECK: Attempting fusion on Candidate Set:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Fusion candidates do not have identical trip counts. Not fusing.
; CHECK: Loop Fusion complete
define void @different_bounds(ptr noalias %arg) {
bb:
  br label %bb5

bb4:                                              ; preds = %bb11
  br label %bb13

bb5:                                              ; preds = %bb, %bb11
  %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
  %tmp = add nsw i64 %.013, -3
  %tmp6 = add nuw nsw i64 %.013, 3
  %tmp7 = mul nsw i64 %tmp, %tmp6
  %tmp8 = srem i64 %tmp7, %.013
  %tmp9 = trunc i64 %tmp8 to i32
  %tmp10 = getelementptr inbounds i32, ptr %arg, i64 %.013
  store i32 %tmp9, ptr %tmp10, align 4
  br label %bb11

bb11:                                             ; preds = %bb5
  %tmp12 = add nuw nsw i64 %.013, 1
  %exitcond2 = icmp ne i64 %tmp12, 100
  br i1 %exitcond2, label %bb5, label %bb4

bb13:                                             ; preds = %bb4
  br label %bb16

bb15:                                             ; preds = %bb23
  br label %bb25

bb16:                                             ; preds = %bb13, %bb23
  %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
  %tmp17 = add nsw i64 %.02, -3
  %tmp18 = add nuw nsw i64 %.02, 3
  %tmp19 = mul nsw i64 %tmp17, %tmp18
  %tmp20 = srem i64 %tmp19, %.02
  %tmp21 = trunc i64 %tmp20 to i32
  %tmp22 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %.02
  store i32 %tmp21, ptr %tmp22, align 4
  br label %bb23

bb23:                                             ; preds = %bb16
  %tmp24 = add nuw nsw i64 %.02, 1
  %exitcond = icmp ne i64 %tmp24, 200
  br i1 %exitcond, label %bb16, label %bb15

bb25:                                             ; preds = %bb15
  ret void
}

; Check that the negative dependence between the two candidates is identified
; and prevents fusion.

; CHECK: Performing Loop Fusion on function negative_dependence
; CHECK: Fusion Candidates:
; CHECK: *** Fusion Candidate Set ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
; CHECK: Attempting fusion on Candidate Set:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Memory dependencies do not allow fusion!
; CHECK: Loop Fusion complete
define void @negative_dependence(ptr noalias %arg) {
bb:
  br label %bb7

bb11.preheader:                                   ; preds = %bb9
  br label %bb13

bb7:                                              ; preds = %bb, %bb9
  %indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb9 ]
  %tmp = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv22
  %tmp8 = trunc i64 %indvars.iv22 to i32
  store i32 %tmp8, ptr %tmp, align 4
  br label %bb9

bb9:                                              ; preds = %bb7
  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1
  %exitcond4 = icmp ne i64 %indvars.iv.next3, 100
  br i1 %exitcond4, label %bb7, label %bb11.preheader

bb13:                                             ; preds = %bb11.preheader, %bb18
  %indvars.iv1 = phi i64 [ 0, %bb11.preheader ], [ %indvars.iv.next, %bb18 ]
  %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
  %tmp14 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv.next
  %tmp15 = load i32, ptr %tmp14, align 4
  %tmp16 = shl nsw i32 %tmp15, 1
  %tmp17 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv1
  store i32 %tmp16, ptr %tmp17, align 4
  br label %bb18

bb18:                                             ; preds = %bb13
  %exitcond = icmp ne i64 %indvars.iv.next, 100
  br i1 %exitcond, label %bb13, label %bb19

bb19:                                             ; preds = %bb18
  ret void
}

; Check for values defined in Loop 0 and used in Loop 1.
; It is not safe to fuse in this case, because the second loop has
; a use of %.01.lcssa which is defined in the body of loop 0. The
; first loop must execute completely in order to compute the correct
; value of %.01.lcssa to be used in the second loop.

; CHECK: Performing Loop Fusion on function sumTest
; CHECK: Fusion Candidates:
; CHECK: *** Fusion Candidate Set ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
; CHECK: Attempting fusion on Candidate Set:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Memory dependencies do not allow fusion!
; CHECK: Loop Fusion complete
define i32 @sumTest(ptr noalias %arg) {
bb:
  br label %bb9

bb13.preheader:                                   ; preds = %bb9
  br label %bb15

bb9:                                              ; preds = %bb, %bb9
  %.01.lcssa = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ]
  %.013 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ]
  %indvars.iv32 = phi i64 [ 0, %bb ], [ %indvars.iv.next4, %bb9 ]
  %tmp = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv32
  %tmp10 = load i32, ptr %tmp, align 4
  %tmp11 = add nsw i32 %.013, %tmp10
  %indvars.iv.next4 = add nuw nsw i64 %indvars.iv32, 1
  %exitcond5 = icmp ne i64 %indvars.iv.next4, 100
  br i1 %exitcond5, label %bb9, label %bb13.preheader

bb14:                                             ; preds = %bb20
  br label %bb21

bb15:                                             ; preds = %bb13.preheader, %bb20
  %indvars.iv1 = phi i64 [ 0, %bb13.preheader ], [ %indvars.iv.next, %bb20 ]
  %tmp16 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv1
  %tmp17 = load i32, ptr %tmp16, align 4
  %tmp18 = sdiv i32 %tmp17, %.01.lcssa
  %tmp19 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv1
  store i32 %tmp18, ptr %tmp19, align 4
  br label %bb20

bb20:                                             ; preds = %bb15
  %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
  %exitcond = icmp ne i64 %indvars.iv.next, 100
  br i1 %exitcond, label %bb15, label %bb14

bb21:                                             ; preds = %bb14
  ret i32 %.01.lcssa
}

; Similar to sumTest above. The first loop computes %add and must
; complete before it is used in the second loop. Thus, these two loops
; also cannot be fused.

; CHECK: Performing Loop Fusion on function test
; CHECK: Fusion Candidates:
; CHECK: *** Fusion Candidate Set ***
; CHECK-NEXT: [[LOOP1PREHEADER:for.body[0-9]*.preheader]]
; CHECK-NEXT: [[LOOP2PREHEADER:for.body[0-9]*.preheader]]
; CHECK-NEXT: ****************************
; CHECK: Attempting fusion on Candidate Set:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Memory dependencies do not allow fusion!
; CHECK: Loop Fusion complete
define float @test(ptr nocapture %a, i32 %n) {
entry:
  %conv = zext i32 %n to i64
  %cmp32 = icmp eq i32 %n, 0
  br i1 %cmp32, label %for.cond.cleanup7, label %for.body

for.body:                                         ; preds = %for.body, %entry
  %i.034 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
  %sum1.033 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ]
  %idxprom = trunc i64 %i.034 to i32
  %arrayidx = getelementptr inbounds float, ptr %a, i32 %idxprom
  %0 = load float, ptr %arrayidx, align 4
  %add = fadd float %sum1.033, %0
  %inc = add nuw nsw i64 %i.034, 1
  %cmp = icmp ult i64 %inc, %conv
  br i1 %cmp, label %for.body, label %for.body8

for.body8:                                        ; preds = %for.body, %for.body8
  %i2.031 = phi i64 [ %inc14, %for.body8 ], [ 0, %for.body ]
  %idxprom9 = trunc i64 %i2.031 to i32
  %arrayidx10 = getelementptr inbounds float, ptr %a, i32 %idxprom9
  %1 = load float, ptr %arrayidx10, align 4
  %div = fdiv float %1, %add
  store float %div, ptr %arrayidx10, align 4
  %inc14 = add nuw nsw i64 %i2.031, 1
  %cmp5 = icmp ult i64 %inc14, %conv
  br i1 %cmp5, label %for.body8, label %for.cond.cleanup7

for.cond.cleanup7:                                ; preds = %for.body8, %entry
  %sum1.0.lcssa36 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body8 ]
  ret float %sum1.0.lcssa36
}

; Check that non-rotated loops are not considered for fusion.
; CHECK: Performing Loop Fusion on function notRotated
; CHECK: Loop bb{{.*}} is not rotated!
; CHECK: Loop bb{{.*}} is not rotated!
define void @notRotated(ptr noalias %arg) {
bb:
  br label %bb5

bb5:                                              ; preds = %bb14, %bb
  %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ]
  %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ]
  %exitcond4 = icmp ne i64 %indvars.iv2, 100
  br i1 %exitcond4, label %bb7, label %bb17

bb7:                                              ; preds = %bb5
  %tmp = add nsw i32 %.01, -3
  %tmp8 = add nuw nsw i64 %indvars.iv2, 3
  %tmp9 = trunc i64 %tmp8 to i32
  %tmp10 = mul nsw i32 %tmp, %tmp9
  %tmp11 = trunc i64 %indvars.iv2 to i32
  %tmp12 = srem i32 %tmp10, %tmp11
  %tmp13 = getelementptr inbounds i32, ptr %arg, i64 %indvars.iv2
  store i32 %tmp12, ptr %tmp13, align 4
  br label %bb14

bb14:                                             ; preds = %bb7
  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1
  %tmp15 = add nuw nsw i32 %.01, 1
  br label %bb5

bb17:                                             ; preds = %bb27, %bb5
  %indvars.iv = phi i64 [ %indvars.iv.next, %bb27 ], [ 0, %bb5 ]
  %.0 = phi i32 [ 0, %bb5 ], [ %tmp28, %bb27 ]
  %exitcond = icmp ne i64 %indvars.iv, 100
  br i1 %exitcond, label %bb19, label %bb18

bb18:                                             ; preds = %bb17
  br label %bb29

bb19:                                             ; preds = %bb17
  %tmp20 = add nsw i32 %.0, -3
  %tmp21 = add nuw nsw i64 %indvars.iv, 3
  %tmp22 = trunc i64 %tmp21 to i32
  %tmp23 = mul nsw i32 %tmp20, %tmp22
  %tmp24 = trunc i64 %indvars.iv to i32
  %tmp25 = srem i32 %tmp23, %tmp24
  %tmp26 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %indvars.iv
  store i32 %tmp25, ptr %tmp26, align 4
  br label %bb27

bb27:                                             ; preds = %bb19
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %tmp28 = add nuw nsw i32 %.0, 1
  br label %bb17

bb29:                                             ; preds = %bb18
  ret void
}