aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll
blob: edad0b59cf9aed9db5af49c8044adf27735c46ae (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck --check-prefix=VF4IC2 %s
; RUN: opt -p loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -S %s | FileCheck --check-prefix=VF8IC1 %s

; Test case from https://github.com/llvm/llvm-project/issues/153946.
; %shr and thus %early.cond will be poison from %iv == 4 onwards.
; Make sure the mask being poison does not propagate across lanes in the
; OR reduction when computing the early exit condition in the vector loop.
define noundef i32 @f(i32 noundef %g) {
; VF4IC2-LABEL: define noundef i32 @f(
; VF4IC2-SAME: i32 noundef [[G:%.*]]) {
; VF4IC2-NEXT:  [[ENTRY:.*:]]
; VF4IC2-NEXT:    br label %[[VECTOR_PH:.*]]
; VF4IC2:       [[VECTOR_PH]]:
; VF4IC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[G]], i64 0
; VF4IC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; VF4IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF4IC2:       [[VECTOR_BODY]]:
; VF4IC2-NEXT:    [[TMP2:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 8, i32 16, i32 24>
; VF4IC2-NEXT:    [[TMP3:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], <i32 32, i32 40, i32 48, i32 56>
; VF4IC2-NEXT:    [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
; VF4IC2-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
; VF4IC2-NEXT:    [[TMP17:%.*]] = freeze <4 x i1> [[TMP4]]
; VF4IC2-NEXT:    [[TMP18:%.*]] = freeze <4 x i1> [[TMP5]]
; VF4IC2-NEXT:    [[TMP6:%.*]] = or <4 x i1> [[TMP17]], [[TMP18]]
; VF4IC2-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; VF4IC2-NEXT:    br label %[[MIDDLE_SPLIT:.*]]
; VF4IC2:       [[MIDDLE_SPLIT]]:
; VF4IC2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
; VF4IC2-NEXT:    br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF4IC2:       [[MIDDLE_BLOCK]]:
; VF4IC2-NEXT:    br label %[[RETURN:.*]]
; VF4IC2:       [[VECTOR_EARLY_EXIT]]:
; VF4IC2-NEXT:    [[TMP9:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
; VF4IC2-NEXT:    [[TMP10:%.*]] = add i64 4, [[TMP9]]
; VF4IC2-NEXT:    [[TMP11:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 true)
; VF4IC2-NEXT:    [[TMP12:%.*]] = add i64 0, [[TMP11]]
; VF4IC2-NEXT:    [[TMP13:%.*]] = icmp ne i64 [[TMP11]], 4
; VF4IC2-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 [[TMP10]]
; VF4IC2-NEXT:    [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
; VF4IC2-NEXT:    [[TMP16:%.*]] = add i32 0, [[TMP15]]
; VF4IC2-NEXT:    br label %[[RETURN]]
; VF4IC2:       [[SCALAR_PH:.*]]:
; VF4IC2-NEXT:    br label %[[LOOP_HEADER:.*]]
; VF4IC2:       [[LOOP_HEADER]]:
; VF4IC2-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; VF4IC2-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3
; VF4IC2-NEXT:    [[SHR:%.*]] = ashr i32 [[G]], [[MUL]]
; VF4IC2-NEXT:    [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0
; VF4IC2-NEXT:    br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]]
; VF4IC2:       [[LOOP_LATCH]]:
; VF4IC2-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; VF4IC2-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8
; VF4IC2-NEXT:    br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]]
; VF4IC2:       [[RETURN]]:
; VF4IC2-NEXT:    [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ [[TMP16]], %[[VECTOR_EARLY_EXIT]] ]
; VF4IC2-NEXT:    ret i32 [[RES]]
;
; VF8IC1-LABEL: define noundef i32 @f(
; VF8IC1-SAME: i32 noundef [[G:%.*]]) {
; VF8IC1-NEXT:  [[ENTRY:.*:]]
; VF8IC1-NEXT:    br label %[[VECTOR_PH:.*]]
; VF8IC1:       [[VECTOR_PH]]:
; VF8IC1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[G]], i64 0
; VF8IC1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
; VF8IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
; VF8IC1:       [[VECTOR_BODY]]:
; VF8IC1-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
; VF8IC1-NEXT:    [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
; VF8IC1-NEXT:    [[TMP8:%.*]] = freeze <8 x i1> [[TMP2]]
; VF8IC1-NEXT:    [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
; VF8IC1-NEXT:    br label %[[MIDDLE_SPLIT:.*]]
; VF8IC1:       [[MIDDLE_SPLIT]]:
; VF8IC1-NEXT:    [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7
; VF8IC1-NEXT:    br i1 [[TMP3]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
; VF8IC1:       [[MIDDLE_BLOCK]]:
; VF8IC1-NEXT:    br label %[[RETURN:.*]]
; VF8IC1:       [[VECTOR_EARLY_EXIT]]:
; VF8IC1-NEXT:    [[TMP5:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP2]], i1 true)
; VF8IC1-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; VF8IC1-NEXT:    [[TMP7:%.*]] = add i32 0, [[TMP6]]
; VF8IC1-NEXT:    br label %[[RETURN]]
; VF8IC1:       [[SCALAR_PH:.*]]:
; VF8IC1-NEXT:    br label %[[LOOP_HEADER:.*]]
; VF8IC1:       [[LOOP_HEADER]]:
; VF8IC1-NEXT:    [[IV:%.*]] = phi i32 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; VF8IC1-NEXT:    [[MUL:%.*]] = shl nuw nsw i32 [[IV]], 3
; VF8IC1-NEXT:    [[SHR:%.*]] = ashr i32 [[G]], [[MUL]]
; VF8IC1-NEXT:    [[EARLY_COND:%.*]] = icmp eq i32 [[SHR]], 0
; VF8IC1-NEXT:    br i1 [[EARLY_COND]], label %[[LOOP_LATCH]], label %[[RETURN]]
; VF8IC1:       [[LOOP_LATCH]]:
; VF8IC1-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; VF8IC1-NEXT:    [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 8
; VF8IC1-NEXT:    br i1 [[EC]], label %[[RETURN]], label %[[LOOP_HEADER]]
; VF8IC1:       [[RETURN]]:
; VF8IC1-NEXT:    [[RES:%.*]] = phi i32 [ [[SHR]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VECTOR_EARLY_EXIT]] ]
; VF8IC1-NEXT:    ret i32 [[RES]]
;
entry:
  br label %loop.header

loop.header:
  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
  %mul = shl nuw nsw i32 %iv, 3
  %shr = ashr i32 %g, %mul
  %early.cond = icmp eq i32 %shr, 0
  br i1 %early.cond, label %loop.latch, label %return

loop.latch:
  %iv.next = add nuw nsw i32 %iv, 1
  %ec = icmp eq i32 %iv.next, 8
  br i1 %ec, label %return, label %loop.header

return:
  %res = phi i32 [ %shr, %loop.latch ], [ %iv, %loop.header ]
  ret i32 %res
}