diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll | 70 |
1 files changed, 41 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll index b4adf7f..827a01f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -113,9 +113,8 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, s0, 1 -; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 -; CHECK-NEXT: s_cbranch_vccz .LBB8_2 +; CHECK-NEXT: s_bitcmp0_b32 s0, 0 +; CHECK-NEXT: s_cbranch_scc1 .LBB8_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB8_3 @@ -159,15 +158,16 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_non_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_non_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_and_b32 s0, s0, 1 -; CHECK-NEXT: v_cmp_ne_u32_e64 vcc, s0, 0 -; CHECK-NEXT: s_cbranch_vccz .LBB10_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB10_3 -; CHECK-NEXT: .LBB10_2: ; %true +; CHECK-NEXT: s_bitcmp1_b32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB10_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB10_3 +; CHECK-NEXT: .LBB10_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB10_3 ; CHECK-NEXT: .LBB10_3: %c = trunc i32 %v to i1 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -204,8 +204,8 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 -; CHECK-NEXT: s_cbranch_vccz .LBB12_2 +; CHECK-NEXT: s_cmp_gt_u32 s0, 11 +; CHECK-NEXT: s_cbranch_scc1 .LBB12_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB12_3 @@ -248,14 +248,14 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_compare(i32 inreg %v) { ; CHECK-LABEL: branch_uniform_ballot_eq_zero_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_lt_u32_e64 vcc, s0, 12 -; CHECK-NEXT: s_cbranch_vccz .LBB14_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB14_3 -; CHECK-NEXT: .LBB14_2: ; %true +; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cbranch_scc1 .LBB14_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB14_3 +; CHECK-NEXT: .LBB14_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB14_3 ; CHECK-NEXT: .LBB14_3: %c = icmp ult i32 %v, 12 %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %c) @@ -296,13 +296,13 @@ false: define amdgpu_cs i32 @branch_uniform_ballot_ne_zero_and(i32 inreg %v1, i32 inreg %v2) { ; CHECK-LABEL: branch_uniform_ballot_ne_zero_and: ; CHECK: ; %bb.0: -; CHECK-NEXT: s_cmp_lt_u32 s0, 12 +; CHECK-NEXT: s_cmp_gt_u32 s0, 11 ; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 -; CHECK-NEXT: s_cmp_gt_u32 s1, 34 +; CHECK-NEXT: s_cmp_lt_u32 s1, 35 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 -; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec -; CHECK-NEXT: s_cbranch_scc0 .LBB16_2 +; CHECK-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB16_2 ; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB16_3 @@ -356,14 +356,14 @@ define amdgpu_cs i32 @branch_uniform_ballot_eq_zero_and(i32 inreg %v1, i32 inreg ; CHECK-NEXT: s_cmp_gt_u32 s1, 34 ; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0 ; CHECK-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1] -; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], exec -; CHECK-NEXT: s_cbranch_scc0 .LBB18_2 -; CHECK-NEXT: ; %bb.1: ; %false -; CHECK-NEXT: s_mov_b32 s0, 33 -; CHECK-NEXT: s_branch .LBB18_3 -; CHECK-NEXT: .LBB18_2: ; %true +; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1] +; CHECK-NEXT: s_cbranch_vccnz .LBB18_2 +; CHECK-NEXT: ; %bb.1: ; %true ; CHECK-NEXT: s_mov_b32 s0, 42 ; CHECK-NEXT: s_branch .LBB18_3 +; CHECK-NEXT: .LBB18_2: ; %false +; CHECK-NEXT: s_mov_b32 s0, 33 +; CHECK-NEXT: s_branch .LBB18_3 ; CHECK-NEXT: .LBB18_3: %v1c = icmp ult i32 %v1, 12 %v2c = icmp ugt i32 %v2, 34 @@ -557,3 +557,15 @@ exit: store i64 %ballot, ptr addrspace(1) %out ret void } + +define amdgpu_cs i64 @compare_bfloats(bfloat %x, bfloat %y) { +; CHECK-LABEL: compare_bfloats: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt bfloat %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} |
