diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll | 161 |
1 files changed, 155 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll index 4cbe682..004c279 100644 --- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll +++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s declare void @llvm.trap() @@ -70,8 +70,33 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { ; CHECK-NEXT: s_mov_b64 s[2:3], -1 ; CHECK-NEXT: s_trap 2 ; CHECK-NEXT: s_branch .LBB0_4 - - +; UNIFY-LABEL: @kernel( +; UNIFY-NEXT: entry: +; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 +; UNIFY-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; UNIFY: if.then: +; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; UNIFY-NEXT: br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]] +; UNIFY: cond.false: +; UNIFY-NEXT: call void @llvm.trap() +; UNIFY-NEXT: unreachable +; UNIFY: if.else: +; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 +; UNIFY-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]] +; UNIFY: if.then3: +; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 +; UNIFY-NEXT: br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]] +; UNIFY: cond.false.i8: +; UNIFY-NEXT: call void @llvm.trap() +; UNIFY-NEXT: unreachable +; UNIFY: if.end6.sink.split: +; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] +; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4 +; UNIFY-NEXT: br label [[IF_END6]] +; UNIFY: if.end6: +; UNIFY-NEXT: ret void +; entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %cmp = icmp eq i32 %n, 256 @@ -105,5 +130,129 @@ if.end6.sink.split: if.end6: ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; UNIFY: {{.*}} + +define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { +; CHECK-LABEL: kernel_callbr: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dword s1, s[8:9], 0x10 +; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: s_cmpk_eq_i32 s1, 0x100 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ; %bb.1: ; %if.then +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB1_2: ; %if.end6.sink.split +; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; CHECK-NEXT: v_mov_b32_e32 v1, s0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_store_dword v0, v1, s[2:3] +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: .LBB1_3: ; Inline asm indirect target +; CHECK-NEXT: ; %UnifiedReturnBlock +; CHECK-NEXT: ; Label of block must be emitted +; CHECK-NEXT: s_endpgm +; CHECK-NEXT: .LBB1_4: ; Inline asm indirect target +; CHECK-NEXT: ; %if.else +; CHECK-NEXT: ; Label of block must be emitted +; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: ; %bb.5: ; %if.then3 +; CHECK-NEXT: s_cmp_eq_u32 s0, 0 +; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 +; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] +; CHECK-NEXT: ;;#ASMSTART +; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: s_branch .LBB1_2 +; CHECK-NEXT: .LBB1_6: ; Inline asm indirect target +; CHECK-NEXT: ; %cond.false.i8 +; CHECK-NEXT: ; Label of block must be emitted +; CHECK-NEXT: .LBB1_7: ; Inline asm indirect target +; CHECK-NEXT: ; %cond.false +; CHECK-NEXT: ; Label of block must be emitted +; CHECK-NEXT: s_trap 2 +; CHECK-NEXT: ; divergent unreachable +; CHECK-NEXT: s_branch .LBB1_3 +; UNIFY-LABEL: @kernel_callbr( +; UNIFY-NEXT: entry: +; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 +; UNIFY-NEXT: [[CMP32:%.*]] = zext i1 [[CMP]] to i32 +; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP32]]) +; UNIFY-NEXT: to label [[IF_THEN:%.*]] [label %if.else] +; UNIFY: if.then: +; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; UNIFY-NEXT: [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32 +; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_32]]) +; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false] +; UNIFY: cond.false: +; UNIFY-NEXT: call void @llvm.trap() +; UNIFY-NEXT: unreachable +; UNIFY: if.else: +; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 +; UNIFY-NEXT: [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32 +; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP2_32]]) +; UNIFY-NEXT: to label [[IF_THEN3:%.*]] [label %if.end6] +; UNIFY: if.then3: +; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 +; UNIFY-NEXT: [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32 +; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]]) +; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8] +; UNIFY: cond.false.i8: +; UNIFY-NEXT: call void @llvm.trap() +; UNIFY-NEXT: unreachable +; UNIFY: if.end6.sink.split: +; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] +; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4 +; UNIFY-NEXT: callbr void asm "", ""() +; UNIFY-NEXT: to label [[IF_END6:%.*]] [] +; UNIFY: if.end6: +; UNIFY-NEXT: ret void +; +entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %cmp = icmp eq i32 %n, 256 + %cmp32 = zext i1 %cmp to i32 + callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else] + +if.then: + %cmp1 = icmp eq i32 %a, 0 + %cmp1_32 = zext i1 %cmp1 to i32 + callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false] + +cond.false: + call void @llvm.trap() + unreachable + +if.else: + %cmp2 = icmp ult i32 %tid, 10 + %cmp2_32 = zext i1 %cmp2 to i32 + callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6] + +if.then3: + %cmp1.i7 = icmp eq i32 %a, 0 + %cmp1.i7_32 = zext i1 %cmp1.i7 to i32 + callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8] + +cond.false.i8: + call void @llvm.trap() + unreachable + +if.end6.sink.split: + %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid + store i32 %a, ptr addrspace(1) %x1, align 4 + callbr void asm "", ""() to label %if.end6 [] + +if.end6: + ret void +} |
