aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/structurize-hoist.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/structurize-hoist.ll180
1 files changed, 180 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
new file mode 100644
index 0000000..42436a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
@@ -0,0 +1,180 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
+
+
+%pair = type { i32, i32 }
+
+define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
+; GFX900-LABEL: test_extractvalue_then_else:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_load_dword v3, v[0:1]
+; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
+; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB0_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
+; GFX900-NEXT: .LBB0_2: ; %Flow
+; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_store_dword v[0:1], v3
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+if:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
+; GFX900-LABEL: test_extractvalue_else_then:
+; GFX900: ; %bb.0: ; %if
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_load_dword v3, v[0:1]
+; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
+; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
+; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB1_2
+; GFX900-NEXT: ; %bb.1: ; %else
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
+; GFX900-NEXT: .LBB1_2: ; %merge
+; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: flat_store_dword v[0:1], v3
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+if:
+ %load_then = load %pair, ptr %ptr
+ br i1 %cond, label %else, label %then
+
+else:
+ %a_else = extractvalue %pair %load_then, 0
+ %sum_else = add i32 %a_else, 1
+ br label %merge
+
+then:
+ %a_then = extractvalue %pair %load_then, 0
+ br label %merge
+
+merge:
+ %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
+ store i32 %phi, ptr %ptr
+ ret void
+}
+
+define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
+; GFX900-LABEL: test_loop_with_if:
+; GFX900: ; %bb.0: ; %entry
+; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c
+; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX900-NEXT: v_mov_b32_e32 v5, 0
+; GFX900-NEXT: s_mov_b64 s[4:5], 0
+; GFX900-NEXT: s_movk_i32 s10, 0xfe
+; GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; GFX900-NEXT: s_bitcmp1_b32 s2, 0
+; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0
+; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3]
+; GFX900-NEXT: v_mov_b32_e32 v2, s1
+; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1
+; GFX900-NEXT: v_mov_b32_e32 v1, s0
+; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3
+; GFX900-NEXT: s_branch .LBB2_2
+; GFX900-NEXT: .LBB2_1: ; %latch
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v5, 20, v3
+; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5
+; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX900-NEXT: flat_store_dword v[1:2], v3
+; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GFX900-NEXT: s_cbranch_execz .LBB2_8
+; GFX900-NEXT: .LBB2_2: ; %loop
+; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2]
+; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1]
+; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GFX900-NEXT: s_mov_b64 s[6:7], 0
+; GFX900-NEXT: s_cbranch_vccnz .LBB2_4
+; GFX900-NEXT: ; %bb.3: ; %if
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5
+; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec
+; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec
+; GFX900-NEXT: s_mov_b64 s[6:7], -1
+; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
+; GFX900-NEXT: .LBB2_4: ; %Flow
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
+; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13]
+; GFX900-NEXT: s_cbranch_execz .LBB2_6
+; GFX900-NEXT: ; %bb.5: ; %else
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_add_u32_e32 v3, v3, v4
+; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
+; GFX900-NEXT: .LBB2_6: ; %Flow1
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
+; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
+; GFX900-NEXT: s_cbranch_execz .LBB2_1
+; GFX900-NEXT: ; %bb.7: ; %then
+; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
+; GFX900-NEXT: flat_store_dword v[1:2], v0
+; GFX900-NEXT: s_branch .LBB2_1
+; GFX900-NEXT: .LBB2_8: ; %end
+; GFX900-NEXT: s_endpgm
+entry:
+ %a = tail call i32 @llvm.amdgcn.workitem.id.x()
+ br label %loop
+
+loop:
+ %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
+ %load = load %pair, ptr %ptr
+ br i1 %cond, label %if, label %else
+
+if:
+ %cmp = icmp sgt i32 %entry_phi, 10
+ br i1 %cmp, label %then, label %else
+
+then:
+ %a_then = extractvalue %pair %load, 0
+ store i32 %a, ptr %ptr, align 4
+ br label %latch
+
+else:
+ %a2 = extractvalue %pair %load, 1
+ %y = extractvalue %pair %load, 0
+ %a_else = add i32 %y, %a2
+ br label %latch
+
+latch:
+ %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
+ store i32 %a_test, ptr %ptr
+ %a15 = add nsw i32 %a_test, 20
+ %a16 = icmp slt i32 %a15, 255
+ br i1 %a16, label %loop, label %end
+
+end:
+ ret void
+}