aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
blob: a5299ea36958ddcc4276e0b7cebf745624918127 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn < %s | FileCheck %s

; Check we can compile this bugpoint-reduced test without an
; infinite loop in TLI.SimplifyDemandedBits() due to failure
; to use return value of TLO.DAG.UpdateNodeOperands()

; Check that code was generated; we know there will be
; a s_endpgm, so check for it.

@0 = external unnamed_addr addrspace(3) global [462 x float], align 4

declare i32 @llvm.amdgcn.workitem.id.y() #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare float @llvm.fmuladd.f32(float, float, float) #0

define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
; CHECK-LABEL: foo:
; CHECK:       ; %bb.0: ; %bb
; CHECK-NEXT:    s_load_dword s6, s[4:5], 0x10
; CHECK-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
; CHECK-NEXT:    s_load_dword s10, s[4:5], 0x11
; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
; CHECK-NEXT:    s_movk_i32 s0, 0x54
; CHECK-NEXT:    v_mov_b32_e32 v0, 0
; CHECK-NEXT:    v_mad_u32_u24 v1, v1, s0, v2
; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
; CHECK-NEXT:    s_bitcmp1_b32 s6, 8
; CHECK-NEXT:    s_cselect_b64 s[0:1], -1, 0
; CHECK-NEXT:    s_bitcmp1_b32 s6, 16
; CHECK-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s[0:1]
; CHECK-NEXT:    s_cselect_b64 s[4:5], -1, 0
; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v2
; CHECK-NEXT:    s_xor_b64 s[4:5], s[4:5], -1
; CHECK-NEXT:    s_bitcmp1_b32 s2, 24
; CHECK-NEXT:    s_cselect_b64 s[6:7], -1, 0
; CHECK-NEXT:    s_xor_b64 s[6:7], s[6:7], -1
; CHECK-NEXT:    s_bitcmp1_b32 s3, 0
; CHECK-NEXT:    s_cselect_b64 s[8:9], -1, 0
; CHECK-NEXT:    s_bitcmp1_b32 s10, 8
; CHECK-NEXT:    s_cselect_b64 s[10:11], -1, 0
; CHECK-NEXT:    s_and_b64 s[2:3], exec, s[6:7]
; CHECK-NEXT:    s_and_b64 s[4:5], exec, s[4:5]
; CHECK-NEXT:    s_and_b64 s[6:7], exec, s[10:11]
; CHECK-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
; CHECK-NEXT:    s_mov_b32 m0, -1
; CHECK-NEXT:  .LBB0_1: ; %.loopexit145
; CHECK-NEXT:    ; =>This Loop Header: Depth=1
; CHECK-NEXT:    ; Child Loop BB0_3 Depth 2
; CHECK-NEXT:    ; Child Loop BB0_4 Depth 3
; CHECK-NEXT:    ; Child Loop BB0_5 Depth 2
; CHECK-NEXT:    v_mov_b32_e32 v2, v1
; CHECK-NEXT:    s_branch .LBB0_3
; CHECK-NEXT:  .LBB0_2: ; %.loopexit
; CHECK-NEXT:    ; in Loop: Header=BB0_3 Depth=2
; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 0x540, v2
; CHECK-NEXT:    s_mov_b64 vcc, s[4:5]
; CHECK-NEXT:    s_cbranch_vccnz .LBB0_5
; CHECK-NEXT:  .LBB0_3: ; %bb13
; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
; CHECK-NEXT:    ; => This Loop Header: Depth=2
; CHECK-NEXT:    ; Child Loop BB0_4 Depth 3
; CHECK-NEXT:    s_and_b64 vcc, exec, s[0:1]
; CHECK-NEXT:    v_mov_b32_e32 v3, v2
; CHECK-NEXT:    s_cbranch_vccnz .LBB0_2
; CHECK-NEXT:  .LBB0_4: ; %bb21
; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
; CHECK-NEXT:    ; Parent Loop BB0_3 Depth=2
; CHECK-NEXT:    ; => This Inner Loop Header: Depth=3
; CHECK-NEXT:    ds_write_b32 v3, v0
; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 32, v3
; CHECK-NEXT:    s_mov_b64 vcc, s[2:3]
; CHECK-NEXT:    s_cbranch_vccz .LBB0_4
; CHECK-NEXT:    s_branch .LBB0_2
; CHECK-NEXT:  .LBB0_5: ; %bb31
; CHECK-NEXT:    ; Parent Loop BB0_1 Depth=1
; CHECK-NEXT:    ; => This Inner Loop Header: Depth=2
; CHECK-NEXT:    s_mov_b64 vcc, s[6:7]
; CHECK-NEXT:    s_cbranch_vccz .LBB0_5
; CHECK-NEXT:  ; %bb.6: ; %bb30
; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT:    s_mov_b64 vcc, s[8:9]
; CHECK-NEXT:    s_cbranch_vccz .LBB0_1
; CHECK-NEXT:  ; %bb.7: ; %bb11
; CHECK-NEXT:    s_endpgm
bb:
  %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
  %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
  %tmp5 = and i32 %tmp, 15
  %tmp6 = mul nuw nsw i32 %tmp5, 21
  %tmp7 = sub i32 %tmp6, 0
  %tmp8 = add i32 %tmp7, 0
  %tmp9 = add i32 %tmp8, 0
  br label %bb12

bb11:                                             ; preds = %bb30
  br i1 %c0, label %bb37, label %bb38

bb12:                                             ; preds = %bb30, %bb
  br i1 false, label %.preheader, label %.loopexit145

.loopexit145:                                     ; preds = %.preheader, %bb12
  br label %bb13

bb13:                                             ; preds = %.loopexit, %.loopexit145
  %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ]
  %tmp15 = add nsw i32 %tmp14, -3
  %tmp16 = mul i32 %tmp14, 21
  br i1 %c1, label %bb17, label %.loopexit

bb17:                                             ; preds = %bb13
  %tmp18 = mul i32 %tmp15, 224
  %tmp19 = add i32 0, %tmp18
  br label %bb21

.loopexit:                                        ; preds = %bb21, %bb13
  %tmp20 = add nuw nsw i32 %tmp14, 16
  br i1 %c2, label %bb13, label %bb26

bb21:                                             ; preds = %bb21, %bb17
  %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
  %tmp23 = add i32 %tmp22, %tmp16
  %tmp24 = getelementptr inbounds float, ptr addrspace(3) @0, i32 %tmp23
  store float 0.0, ptr addrspace(3) %tmp24, align 4
  %tmp25 = add nuw i32 %tmp22, 8
  br i1 %c3, label %bb21, label %.loopexit

bb26:                                             ; preds = %.loopexit
  br label %bb31

.preheader:                                       ; preds = %.preheader, %bb12
  %tmp27 = phi i32 [ %tmp28, %.preheader ], [ poison, %bb12 ]
  %tmp28 = add nuw i32 %tmp27, 128
  %tmp29 = icmp ult i32 %tmp28, 1568
  br i1 %tmp29, label %.preheader, label %.loopexit145

bb30:                                             ; preds = %bb31
  br i1 %c4, label %bb11, label %bb12

bb31:                                             ; preds = %bb31, %bb26
  %tmp32 = phi i32 [ %tmp9, %bb26 ], [ poison, %bb31 ]
  %tmp33 = getelementptr inbounds [462 x float], ptr addrspace(3) @0, i32 0, i32 %tmp32
  %tmp34 = load float, ptr addrspace(3) %tmp33, align 4
  %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float poison, float poison)
  %tmp36 = tail call float @llvm.fmuladd.f32(float poison, float poison, float %tmp35)
  br i1 %c5, label %bb30, label %bb31

bb37:                                             ; preds = %bb11
  br label %bb38

bb38:                                             ; preds = %bb37, %bb11
  ret void
}

attributes #0 = { nounwind readnone speculatable }

!0 = !{i32 8, i32 16, i32 1}