diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll index 6c4f504..c962c05 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-uniform-waterfall.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O3 -S < %s | FileCheck %s -check-prefix=O3-CHECK define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) { @@ -23,7 +24,9 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -75,7 +78,9 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(ptr addrs ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 0, [[BALLOT]] ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -126,6 +131,8 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero(ptr addrspace(1) ; PASS-CHECK-NEXT: br label %[[WHILE:.*]] ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]]) +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 0, [[BALLOT]] ; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -175,6 +182,8 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(ptr addrspac ; PASS-CHECK-NEXT: br label %[[WHILE:.*]] ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]]) +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -225,7 +234,9 @@ define protected amdgpu_kernel void @trivial_uniform_waterfall(ptr addrspace(1) ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 0, 0 @@ -292,7 +303,9 @@ define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i3 ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[MYMASK]], [[MYMASK]] @@ -359,7 +372,9 @@ define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(ptr addrspace ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] ; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[NOT_DONE]]) ; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[BALLOT]], 0 ; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 @@ -410,6 +425,8 @@ define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(ptr addrspace ; PASS-CHECK-NEXT: br label %[[WHILE:.*]] ; PASS-CHECK: [[WHILE]]: ; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ] +; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[DONE]]) +; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i32 0, [[BALLOT]] ; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]] ; PASS-CHECK: [[IF]]: ; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4 |
