aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll
blob: 8e244b5227669160a5387d08052fa5a2c43148e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11,GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32 -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11,SDAG %s

; RUN: not llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -global-isel=1 < %s 2>&1 | FileCheck  -check-prefix=GISEL-ERR %s
; RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -global-isel=0 < %s 2>&1 | FileCheck  -check-prefix=SDAG-ERR %s

; GISEL-ERR: LLVM ERROR: cannot select: {{.*}}  = G_INTRINSIC intrinsic(@llvm.amdgcn.inverse.ballot)
; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.inverse.ballot

declare i1 @llvm.amdgcn.inverse.ballot(i32)

; Test ballot(0)
define amdgpu_cs void @constant_false_inverse_ballot(ptr addrspace(1) %out) {
; GFX11-LABEL: constant_false_inverse_ballot:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_mov_b32 s0, 0
; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
; GFX11-NEXT:    s_endpgm
entry:
  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 0)
  %sel    = select i1 %ballot, i32 1, i32 0
  store i32 %sel, ptr addrspace(1) %out
  ret void
}

; Test ballot(1)

define amdgpu_cs void @constant_true_inverse_ballot(ptr addrspace(1) %out) {
; GFX11-LABEL: constant_true_inverse_ballot:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_mov_b32 s0, -1
; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
; GFX11-NEXT:    s_endpgm
entry:
  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 u0xFFFFFFFF)
  %sel    = select i1 %ballot, i32 1, i32 0
  store i32 %sel, ptr addrspace(1) %out
  ret void
}

define amdgpu_cs void @constant_mask_inverse_ballot(ptr addrspace(1) %out) {
; GFX11-LABEL: constant_mask_inverse_ballot:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_movk_i32 s0, 0x1000
; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
; GFX11-NEXT:    s_endpgm
entry:
  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 u0x00001000)
  %sel    = select i1 %ballot, i32 1, i32 0
  store i32 %sel, ptr addrspace(1) %out
  ret void
}

; Test inverse ballot using a vgpr as input

define amdgpu_cs void @vgpr_inverse_ballot(i32 %input, ptr addrspace(1) %out) {
; GFX11-LABEL: vgpr_inverse_ballot:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s0
; GFX11-NEXT:    global_store_b32 v[1:2], v0, off
; GFX11-NEXT:    s_endpgm
entry:
  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 %input)
  %sel = select i1 %ballot, i32 1, i32 0
  store i32 %sel, ptr addrspace(1) %out
  ret void
}

define amdgpu_cs void @sgpr_inverse_ballot(i32 inreg %input, ptr addrspace(1) %out) {
; GFX11-LABEL: sgpr_inverse_ballot:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
; GFX11-NEXT:    s_endpgm
entry:
  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 %input)
  %sel = select i1 %ballot, i32 1, i32 0
  store i32 %sel, ptr addrspace(1) %out
  ret void
}

; Test ballot after phi
define amdgpu_cs void @phi_uniform(i32 inreg %s0_1, i32 inreg %s2, ptr addrspace(1) %out) {
; GFX11-LABEL: phi_uniform:
; GFX11:       ; %bb.0: ; %entry
; GFX11-NEXT:    s_cmp_lg_u32 s1, 0
; GFX11-NEXT:    s_cbranch_scc1 .LBB5_2
; GFX11-NEXT:  ; %bb.1: ; %if
; GFX11-NEXT:    s_add_i32 s0, s0, 1
; GFX11-NEXT:  .LBB5_2: ; %endif
; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s0
; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
; GFX11-NEXT:    s_endpgm
entry:
  %cc = icmp ne i32 %s2, 0
  br i1 %cc, label %endif, label %if

if:
  %tmp = add i32 %s0_1, 1
  br label %endif

endif:
  %input = phi i32 [ %s0_1, %entry ], [ %tmp, %if ]

  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 %input)
  %sel = select i1 %ballot, i32 1, i32 0
  store i32 %sel, ptr addrspace(1) %out
  ret void
}

; Test for branching
; GISel implementation is currently incorrect.
; The change in the branch affects all lanes, not just the branching ones.
; This test will be fixed once GISel correctly takes uniformity analysis into account.
define amdgpu_cs void @inverse_ballot_branch(i32 inreg %s0_1, i32 inreg %s2, ptr addrspace(1) %out) {
; GISEL-LABEL: inverse_ballot_branch:
; GISEL:       ; %bb.0: ; %entry
; GISEL-NEXT:    s_xor_b32 s2, s1, -1
; GISEL-NEXT:    s_and_saveexec_b32 s1, s2
; GISEL-NEXT:  ; %bb.1: ; %if
; GISEL-NEXT:    s_add_i32 s0, s0, 1
; GISEL-NEXT:  ; %bb.2: ; %endif
; GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
; GISEL-NEXT:    v_mov_b32_e32 v2, s0
; GISEL-NEXT:    global_store_b32 v[0:1], v2, off
; GISEL-NEXT:    s_endpgm
;
; SDAG-LABEL: inverse_ballot_branch:
; SDAG:       ; %bb.0: ; %entry
; SDAG-NEXT:    v_mov_b32_e32 v2, s0
; SDAG-NEXT:    s_xor_b32 s2, s1, -1
; SDAG-NEXT:    s_and_saveexec_b32 s1, s2
; SDAG-NEXT:  ; %bb.1: ; %if
; SDAG-NEXT:    s_add_i32 s0, s0, 1
; SDAG-NEXT:    v_mov_b32_e32 v2, s0
; SDAG-NEXT:  ; %bb.2: ; %endif
; SDAG-NEXT:    s_or_b32 exec_lo, exec_lo, s1
; SDAG-NEXT:    global_store_b32 v[0:1], v2, off
; SDAG-NEXT:    s_endpgm
entry:
  %ballot = call i1 @llvm.amdgcn.inverse.ballot(i32 %s2)
  br i1 %ballot, label %endif, label %if

if:
  %tmp = add i32 %s0_1, 1
  br label %endif

endif:
  %input = phi i32 [ %s0_1, %entry ], [ %tmp, %if ]
  store i32 %input, ptr addrspace(1) %out
  ret void
}