diff options
author | Jay Foad <jay.foad@amd.com> | 2022-01-31 14:55:36 +0000 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2022-01-31 14:55:36 +0000 |
commit | 8faad296347a2c6d0567f28b307bfc40acc26e07 (patch) | |
tree | 7ca33eb5fd7731b06ed3e035886d41fbd9fc8d58 | |
parent | 002b944dfa3d588b05f967d6b55e6c36ce97d4e5 (diff) | |
download | llvm-8faad296347a2c6d0567f28b307bfc40acc26e07.zip llvm-8faad296347a2c6d0567f28b307bfc40acc26e07.tar.gz llvm-8faad296347a2c6d0567f28b307bfc40acc26e07.tar.bz2 |
Revert "[Local] invertCondition: try modifying an existing ICmpInst"
This reverts commit a6b54ddaba2d5dc0f72dcc4591c92b9544eb0016.
Apparently it is not safe to modify the condition even if it passes the
hasOneUse test, because StructurizeCFG might have other references to
the condition that are not manifest in the IR use-def chains.
23 files changed, 164 insertions, 158 deletions
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index c457931..9f33d2f 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3343,14 +3343,6 @@ Value *llvm::invertCondition(Value *Condition) { if (I->getParent() == Parent && match(I, m_Not(m_Specific(Condition)))) return I; - // Fourth: Modify an existing instruction - if (Condition->hasOneUse()) { - if (auto *CI = dyn_cast<CmpInst>(Condition)) { - CI->setPredicate(CI->getInversePredicate()); - return Condition; - } - } - // Last option: Create a new instruction auto *Inverted = BinaryOperator::CreateNot(Condition, Condition->getName() + ".inv"); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 746f259..3e8d1ec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -139,10 +139,8 @@ define void @constrained_if_register_class() { ; CHECK-NEXT: s_load_dword s4, s[4:5], 0x0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc0 .LBB4_2 -; CHECK-NEXT: .LBB4_1: ; %bb12 -; CHECK-NEXT: s_setpc_b64 s[30:31] -; CHECK-NEXT: .LBB4_2: ; %bb2 +; CHECK-NEXT: s_cbranch_scc1 .LBB4_4 +; CHECK-NEXT: ; %bb.1: ; %bb2 ; CHECK-NEXT: s_getpc_b64 s[4:5] ; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12 @@ -155,13 +153,15 @@ define void @constrained_if_register_class() { ; CHECK-NEXT: s_mov_b32 s4, -1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 -; CHECK-NEXT: s_cbranch_vccnz .LBB4_4 -; CHECK-NEXT: ; %bb.3: ; %bb7 +; CHECK-NEXT: s_cbranch_vccnz .LBB4_3 +; CHECK-NEXT: ; %bb.2: ; %bb7 ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: .LBB4_4: ; %bb8 +; CHECK-NEXT: .LBB4_3: ; %bb8 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc1 .LBB4_1 -; CHECK-NEXT: ; %bb.5: ; %bb11 +; CHECK-NEXT: s_cbranch_scc0 .LBB4_5 +; CHECK-NEXT: .LBB4_4: ; %bb12 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: .LBB4_5: ; %bb11 ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen ; CHECK-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 0b1105f..73416db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -838,7 +838,7 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow1 +; CGP-NEXT: .LBB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -3118,7 +3118,7 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow1 +; CGP-NEXT: .LBB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 1e95103..5e60c7c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -824,7 +824,7 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow1 +; CGP-NEXT: .LBB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -3072,7 +3072,7 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v2, v4, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow1 +; CGP-NEXT: .LBB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll index 2ba189c..bf3c080 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -759,7 +759,7 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow1 +; CGP-NEXT: .LBB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -1641,7 +1641,7 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow1 +; CGP-NEXT: .LBB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index e616322..97806c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -750,7 +750,7 @@ define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) { ; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc ; CGP-NEXT: ; implicit-def: $vgpr4 ; CGP-NEXT: ; implicit-def: $vgpr10 -; CGP-NEXT: .LBB2_2: ; %Flow1 +; CGP-NEXT: .LBB2_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz .LBB2_4 @@ -2181,7 +2181,7 @@ define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3 ; CGP-NEXT: ; implicit-def: $vgpr8 -; CGP-NEXT: .LBB8_2: ; %Flow1 +; CGP-NEXT: .LBB8_2: ; %Flow2 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9] ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll index c1cb51e..c8c8911 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll @@ -227,31 +227,30 @@ bb3: ; GCN-LABEL: {{^}}uniform_unconditional_min_long_forward_branch: ; GCN: s_cmp_eq_u32 -; GCN: s_cbranch_scc{{[0-1]}} [[BB1:.LBB[0-9]+_[0-9]+]] +; GCN: s_cbranch_scc{{[0-1]}} [[BB2:.LBB[0-9]+_[0-9]+]] ; GCN-NEXT: {{.LBB[0-9]+_[0-9]+}}: ; %bb0 ; GCN-NEXT: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} ; GCN-NEXT: [[POST_GETPC:.Lpost_getpc[0-9]+]]:{{$}} -; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB4:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295 -; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB4]]-[[POST_GETPC]])>>32 +; GCN-NEXT: s_add_u32 s[[PC0_LO]], s[[PC0_LO]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])&4294967295 +; GCN-NEXT: s_addc_u32 s[[PC0_HI]], s[[PC0_HI]], ([[BB3:.LBB[0-9]_[0-9]+]]-[[POST_GETPC]])>>32 ; GCN-NEXT: s_setpc_b64 s{{\[}}[[PC0_LO]]:[[PC0_HI]]{{\]}} -; GCN: [[BB1]]: -; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 -; GCN: buffer_store_dword [[BB2_K]] - -; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63 -; GCN: buffer_store_dword [[BB4_K]] -; GCN: s_endpgm - -; GCN: [[BB4]]: ; %bb3 +; GCN: [[BB2]]: ; %bb3 ; GCN: v_nop_e64 ; GCN: v_nop_e64 ; GCN: v_nop_e64 ; GCN: v_nop_e64 ; GCN: ;;#ASMEND -; GCN: .Lfunc_end{{[0-9]+}}: +; GCN: [[BB3]]: +; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17 +; GCN: buffer_store_dword [[BB2_K]] + +; GCN: v_mov_b32_e32 [[BB4_K:v[0-9]+]], 63 +; GCN: buffer_store_dword [[BB4_K]] +; GCN: s_endpgm +; GCN-NEXT: .Lfunc_end{{[0-9]+}}: define amdgpu_kernel void @uniform_unconditional_min_long_forward_branch(i32 addrspace(1)* %arg, i32 %arg1) { bb0: %tmp = icmp ne i32 %arg1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll index e4a1177..308bc49 100644 --- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll @@ -1502,7 +1502,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_lshr_b32 s5, s4, 16 ; SI-NEXT: s_cmp_lg_u32 s5, 0 -; SI-NEXT: s_cbranch_scc0 .LBB14_4 +; SI-NEXT: s_cbranch_scc0 .LBB14_2 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mov_b32 s11, 0xf000 ; SI-NEXT: s_mov_b32 s10, -1 @@ -1510,22 +1510,22 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace ; SI-NEXT: s_mov_b32 s9, s3 ; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 ; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: s_cbranch_execnz .LBB14_3 -; SI-NEXT: .LBB14_2: ; %if +; SI-NEXT: s_cbranch_execz .LBB14_3 +; SI-NEXT: s_branch .LBB14_4 +; SI-NEXT: .LBB14_2: +; SI-NEXT: s_mov_b64 s[2:3], -1 +; SI-NEXT: v_mov_b32_e32 v0, 0 +; SI-NEXT: .LBB14_3: ; %if ; SI-NEXT: s_and_b32 s2, s4, 0xffff ; SI-NEXT: s_bcnt1_i32_b32 s2, s2 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s2 -; SI-NEXT: .LBB14_3: ; %endif +; SI-NEXT: .LBB14_4: ; %endif ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm -; SI-NEXT: .LBB14_4: -; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: v_mov_b32_e32 v0, 0 -; SI-NEXT: s_branch .LBB14_2 ; ; VI-LABEL: ctpop_i16_in_br: ; VI: ; %bb.0: ; %entry @@ -1535,7 +1535,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace ; VI-NEXT: s_lshr_b32 s5, s4, 16 ; VI-NEXT: v_cmp_ne_u16_e64 s[6:7], s5, 0 ; VI-NEXT: s_and_b64 vcc, exec, s[6:7] -; VI-NEXT: s_cbranch_vccz .LBB14_4 +; VI-NEXT: s_cbranch_vccz .LBB14_2 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_mov_b32 s11, 0xf000 ; VI-NEXT: s_mov_b32 s10, -1 @@ -1543,22 +1543,22 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace ; VI-NEXT: s_mov_b32 s9, s3 ; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2 ; VI-NEXT: s_mov_b64 s[2:3], 0 -; VI-NEXT: s_cbranch_execnz .LBB14_3 -; VI-NEXT: .LBB14_2: ; %if +; VI-NEXT: s_cbranch_execz .LBB14_3 +; VI-NEXT: s_branch .LBB14_4 +; VI-NEXT: .LBB14_2: +; VI-NEXT: s_mov_b64 s[2:3], -1 +; VI-NEXT: ; implicit-def: $vgpr0 +; VI-NEXT: .LBB14_3: ; %if ; VI-NEXT: s_and_b32 s2, s4, 0xffff ; VI-NEXT: s_bcnt1_i32_b32 s2, s2 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 -; VI-NEXT: .LBB14_3: ; %endif +; VI-NEXT: .LBB14_4: ; %endif ; VI-NEXT: s_mov_b32 s3, 0xf000 ; VI-NEXT: s_mov_b32 s2, -1 ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm -; VI-NEXT: .LBB14_4: -; VI-NEXT: s_mov_b64 s[2:3], -1 -; VI-NEXT: ; implicit-def: $vgpr0 -; VI-NEXT: s_branch .LBB14_2 ; ; EG-LABEL: ctpop_i16_in_br: ; EG: ; %bb.0: ; %entry diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 4de859f..89d319d 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1534,17 +1534,19 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add ; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cbranch_scc0 .LBB30_4 +; SI-NEXT: s_cbranch_scc0 .LBB30_2 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_load_dword s7, s[2:3], 0x1 ; SI-NEXT: s_mov_b64 s[4:5], 0 ; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccnz .LBB30_3 -; SI-NEXT: .LBB30_2: ; %if +; SI-NEXT: s_cbranch_vccz .LBB30_3 +; SI-NEXT: s_branch .LBB30_4 +; SI-NEXT: .LBB30_2: +; SI-NEXT: .LBB30_3: ; %if ; SI-NEXT: s_load_dword s7, s[2:3], 0x0 -; SI-NEXT: .LBB30_3: ; %endif +; SI-NEXT: .LBB30_4: ; %endif ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, s6 ; SI-NEXT: s_mov_b32 s3, 0x100f000 @@ -1552,8 +1554,6 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add ; SI-NEXT: v_mov_b32_e32 v1, s7 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm -; SI-NEXT: .LBB30_4: -; SI-NEXT: s_branch .LBB30_2 ; ; VI-LABEL: insert_split_bb: ; VI: ; %bb.0: ; %entry @@ -1561,14 +1561,16 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add ; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_cmp_lg_u32 s6, 0 -; VI-NEXT: s_cbranch_scc0 .LBB30_4 +; VI-NEXT: s_cbranch_scc0 .LBB30_2 ; VI-NEXT: ; %bb.1: ; %else ; VI-NEXT: s_load_dword s7, s[2:3], 0x4 -; VI-NEXT: s_cbranch_execnz .LBB30_3 -; VI-NEXT: .LBB30_2: ; %if +; VI-NEXT: s_cbranch_execz .LBB30_3 +; VI-NEXT: s_branch .LBB30_4 +; VI-NEXT: .LBB30_2: +; VI-NEXT: .LBB30_3: ; %if ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_load_dword s7, s[2:3], 0x0 -; VI-NEXT: .LBB30_3: ; %endif +; VI-NEXT: .LBB30_4: ; %endif ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s6 ; VI-NEXT: s_mov_b32 s3, 0x1100f000 @@ -1576,8 +1578,6 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add ; VI-NEXT: v_mov_b32_e32 v1, s7 ; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; VI-NEXT: s_endpgm -; VI-NEXT: .LBB30_4: -; VI-NEXT: s_branch .LBB30_2 entry: %0 = insertelement <2 x i32> undef, i32 %a, i32 0 %1 = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll index 8cccb5e..ea6493c 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_break.ll +++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll @@ -17,10 +17,11 @@ define amdgpu_kernel void @break_loop(i32 %arg) #0 { ; OPT-NEXT: br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]] ; OPT: bb4: ; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]] +; OPT-NEXT: [[TMP0:%.*]] = xor i1 [[CMP1]], true ; OPT-NEXT: br label [[FLOW]] ; OPT: Flow: -; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ] +; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ] ; OPT-NEXT: [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]]) ; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]]) ; OPT-NEXT: br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]] diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index 7fa17ec..ff22712 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -9,14 +9,14 @@ ; StructurizeCFG. ; IR-LABEL: @multi_divergent_region_exit_ret_ret( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) ; IR: %1 = extractvalue { i1, i64 } %0, 0 ; IR: %2 = extractvalue { i1, i64 } %0, 1 ; IR: br i1 %1, label %LeafBlock1, label %Flow ; IR: Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: %6 = extractvalue { i1, i64 } %5, 0 ; IR: %7 = extractvalue { i1, i64 } %5, 1 @@ -42,7 +42,7 @@ ; IR: Flow1: ; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] +; IR: %13 = phi i1 [ %SwitchLeaf.inv, %LeafBlock ], [ %4, %Flow ] ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR: %14 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %13) ; IR: %15 = extractvalue { i1, i64 } %14, 0 @@ -75,13 +75,16 @@ ; GCN-NEXT: s_or_saveexec_b64 ; GCN-NEXT: s_xor_b64 +; FIXME: Why is this compare essentially repeated? ; GCN: ; %LeafBlock -; GCN-DAG: v_cmp_ne_u32_e32 vcc, 1, +; GCN-DAG: v_cmp_eq_u32_e32 vcc, 1, +; GCN-DAG: v_cmp_ne_u32_e64 [[TMP1:s\[[0-9]+:[0-9]+\]]], 1, ; GCN-DAG: s_andn2_b64 [[EXIT0]], [[EXIT0]], exec ; GCN-DAG: s_andn2_b64 [[EXIT1]], [[EXIT1]], exec ; GCN-DAG: s_and_b64 [[TMP0:s\[[0-9]+:[0-9]+\]]], vcc, exec +; GCN-DAG: s_and_b64 [[TMP1]], [[TMP1]], exec ; GCN-DAG: s_or_b64 [[EXIT0]], [[EXIT0]], [[TMP0]] -; GCN-DAG: s_or_b64 [[EXIT1]], [[EXIT1]], [[TMP0]] +; GCN-DAG: s_or_b64 [[EXIT1]], [[EXIT1]], [[TMP1]] ; GCN: ; %Flow4 ; GCN-NEXT: s_or_b64 exec, exec, @@ -138,7 +141,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) @@ -193,22 +196,24 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_exit_region_divergent_ret_uniform_ret( -; IR: %divergent.cond0 = icmp sge i32 %tmp16, 2 +; IR: %divergent.cond0 = icmp slt i32 %tmp16, 2 ; IR: llvm.amdgcn.if ; IR: br i1 ; IR: {{^}}Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %uniform.cond0, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %uniform.cond0.inv, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: br i1 %6, label %LeafBlock, label %Flow1 ; IR: {{^}}LeafBlock: -; IR: %divergent.cond1 = icmp ne i32 %tmp16, 1 +; IR: %divergent.cond1 = icmp eq i32 %tmp16, 1 +; IR: %divergent.cond1.inv = xor i1 %divergent.cond1, true ; IR: br label %Flow1 ; IR: LeafBlock1: -; IR: %uniform.cond0 = icmp ne i32 %arg3, 2 +; IR: %uniform.cond0 = icmp eq i32 %arg3, 2 +; IR: %uniform.cond0.inv = xor i1 %uniform.cond0, true ; IR: br label %Flow ; IR: Flow2: @@ -223,7 +228,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR: {{^}}Flow1: ; IR: %12 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ] +; IR: %13 = phi i1 [ %divergent.cond1.inv, %LeafBlock ], [ %4, %Flow ] ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR: %14 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %13) ; IR: %15 = extractvalue { i1, i64 } %14, 0 @@ -274,12 +279,12 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) ; IR: br i1 %1, label %LeafBlock1, label %Flow ; IR: Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: %8 = phi i1 [ false, %exit1 ], [ %12, %Flow1 ] @@ -396,11 +401,11 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 } ; IR-LABEL: @multi_divergent_region_exit_ret_unreachable( -; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot) +; IR: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %Pivot.inv) ; IR: Flow: ; IR: %3 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ] -; IR: %4 = phi i1 [ %SwitchLeaf2, %LeafBlock1 ], [ false, %entry ] +; IR: %4 = phi i1 [ %SwitchLeaf2.inv, %LeafBlock1 ], [ false, %entry ] ; IR: %5 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %2) ; IR: Flow2: @@ -415,7 +420,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1 ; IR: Flow1: ; IR: %12 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %3, %Flow ] -; IR: %13 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ] +; IR: %13 = phi i1 [ %SwitchLeaf.inv, %LeafBlock ], [ %4, %Flow ] ; IR: call void @llvm.amdgcn.end.cf.i64(i64 %7) ; IR: %14 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %13) ; IR: %15 = extractvalue { i1, i64 } %14, 0 @@ -635,7 +640,7 @@ uniform.ret: ; IR: br i1 %6, label %uniform.if, label %Flow2 ; IR: Flow: ; preds = %uniform.then, %uniform.if -; IR: %7 = phi i1 [ %uniform.cond2, %uniform.then ], [ %uniform.cond1, %uniform.if ] +; IR: %7 = phi i1 [ %uniform.cond2.inv, %uniform.then ], [ %uniform.cond1.inv, %uniform.if ] ; IR: br i1 %7, label %uniform.endif, label %uniform.ret0 ; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index a16f55e..ee90406 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -25,8 +25,9 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { ; OPT: Flow: ; OPT-NEXT: [[TMP4]] = phi i32 [ [[TMP47]], [[ENDIF]] ], [ [[TMP0]], [[LOOP]] ] ; OPT-NEXT: [[TMP5:%.*]] = phi i1 [ [[TMP51:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ] +; OPT-NEXT: [[TMP6:%.*]] = phi i1 [ [[TMP51_INV:%.*]], [[ENDIF]] ], [ true, [[LOOP]] ] ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP3]]) -; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN]]) +; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP6]], i64 [[PHI_BROKEN]]) ; OPT-NEXT: [[TMP8:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP7]]) ; OPT-NEXT: [[TMP9]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP5]], i64 [[PHI_BROKEN2]]) ; OPT-NEXT: br i1 [[TMP8]], label [[FLOW1]], label [[LOOP]] @@ -38,7 +39,8 @@ define amdgpu_vs void @multi_else_break(<4 x float> %vec, i32 %ub, i32 %cont) { ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP9]]) ; OPT-NEXT: ret void ; OPT: ENDIF: -; OPT-NEXT: [[TMP51]] = icmp ne i32 [[TMP47]], [[CONT:%.*]] +; OPT-NEXT: [[TMP51]] = icmp eq i32 [[TMP47]], [[CONT:%.*]] +; OPT-NEXT: [[TMP51_INV]] = xor i1 [[TMP51]], true ; OPT-NEXT: br label [[FLOW]] ; ; GCN-LABEL: multi_else_break: @@ -121,13 +123,14 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { ; OPT-NEXT: [[LOAD0:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 ; OPT-NEXT: br label [[NODEBLOCK:%.*]] ; OPT: NodeBlock: -; OPT-NEXT: [[PIVOT:%.*]] = icmp sge i32 [[LOAD0]], 1 -; OPT-NEXT: br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]] +; OPT-NEXT: [[PIVOT:%.*]] = icmp slt i32 [[LOAD0]], 1 +; OPT-NEXT: [[PIVOT_INV:%.*]] = xor i1 [[PIVOT]], true +; OPT-NEXT: br i1 [[PIVOT_INV]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]] ; OPT: LeafBlock1: ; OPT-NEXT: [[SWITCHLEAF2:%.*]] = icmp eq i32 [[LOAD0]], 1 ; OPT-NEXT: br i1 [[SWITCHLEAF2]], label [[CASE1:%.*]], label [[FLOW3:%.*]] ; OPT: Flow3: -; OPT-NEXT: [[TMP0:%.*]] = phi i1 [ [[CMP2:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ] +; OPT-NEXT: [[TMP0:%.*]] = phi i1 [ [[CMP2_INV:%.*]], [[CASE1]] ], [ true, [[LEAFBLOCK1]] ] ; OPT-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[CASE1]] ], [ true, [[LEAFBLOCK1]] ] ; OPT-NEXT: br label [[FLOW]] ; OPT: LeafBlock: @@ -141,7 +144,8 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { ; OPT-NEXT: br i1 [[TMP5]], label [[FLOW6:%.*]], label [[BB1]] ; OPT: case0: ; OPT-NEXT: [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: [[CMP1:%.*]] = icmp sge i32 [[TMP]], [[LOAD1]] +; OPT-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP]], [[LOAD1]] +; OPT-NEXT: [[CMP1_INV:%.*]] = xor i1 [[CMP1]], true ; OPT-NEXT: br label [[FLOW5]] ; OPT: Flow: ; OPT-NEXT: [[TMP6]] = phi i1 [ [[TMP0]], [[FLOW3]] ], [ true, [[NODEBLOCK]] ] @@ -150,10 +154,11 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { ; OPT-NEXT: br i1 [[TMP8]], label [[LEAFBLOCK:%.*]], label [[FLOW4]] ; OPT: case1: ; OPT-NEXT: [[LOAD2:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4 -; OPT-NEXT: [[CMP2:%.*]] = icmp sge i32 [[TMP]], [[LOAD2]] +; OPT-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP]], [[LOAD2]] +; OPT-NEXT: [[CMP2_INV]] = xor i1 [[CMP2]], true ; OPT-NEXT: br label [[FLOW3]] ; OPT: Flow5: -; OPT-NEXT: [[TMP9]] = phi i1 [ [[CMP1]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ] +; OPT-NEXT: [[TMP9]] = phi i1 [ [[CMP1_INV]], [[CASE0]] ], [ [[TMP6]], [[LEAFBLOCK]] ] ; OPT-NEXT: [[TMP10]] = phi i1 [ false, [[CASE0]] ], [ true, [[LEAFBLOCK]] ] ; OPT-NEXT: br label [[FLOW4]] ; OPT: Flow6: @@ -191,8 +196,8 @@ define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { ; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 glc ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_mov_b64 s[6:7], -1 ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 1, v1 +; GCN-NEXT: s_mov_b64 s[6:7], -1 ; GCN-NEXT: s_and_b64 vcc, exec, vcc ; GCN-NEXT: ; implicit-def: $sgpr8_sgpr9 ; GCN-NEXT: s_mov_b64 s[10:11], -1 diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll index 86d13f3..a1fa2ab 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll @@ -236,8 +236,8 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a ; IR: Flow1: ; IR-NEXT: [[TMP11]] = phi <4 x i32> [ [[MY_TMP9:%.*]], [[BB21:%.*]] ], [ undef, [[BB14]] ] ; IR-NEXT: [[TMP12]] = phi i32 [ [[MY_TMP10:%.*]], [[BB21]] ], [ undef, [[BB14]] ] -; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[MY_TMP12:%.*]], [[BB21]] ], [ true, [[BB14]] ] -; IR-NEXT: [[TMP14]] = phi i1 [ [[MY_TMP12]], [[BB21]] ], [ false, [[BB14]] ] +; IR-NEXT: [[TMP13:%.*]] = phi i1 [ [[TMP18:%.*]], [[BB21]] ], [ true, [[BB14]] ] +; IR-NEXT: [[TMP14]] = phi i1 [ [[TMP18]], [[BB21]] ], [ false, [[BB14]] ] ; IR-NEXT: [[TMP15:%.*]] = phi i1 [ false, [[BB21]] ], [ true, [[BB14]] ] ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP10]]) ; IR-NEXT: [[TMP16]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP13]], i64 [[PHI_BROKEN]]) @@ -262,7 +262,8 @@ define amdgpu_kernel void @nested_loop_conditions(i64 addrspace(1)* nocapture %a ; IR-NEXT: [[MY_TMP9]] = load <4 x i32>, <4 x i32> addrspace(1)* [[MY_TMP8]], align 16 ; IR-NEXT: [[MY_TMP10]] = extractelement <4 x i32> [[MY_TMP9]], i64 0 ; IR-NEXT: [[MY_TMP11:%.*]] = load volatile i32, i32 addrspace(1)* undef -; IR-NEXT: [[MY_TMP12]] = icmp slt i32 [[MY_TMP11]], 9 +; IR-NEXT: [[MY_TMP12:%.*]] = icmp slt i32 [[MY_TMP11]], 9 +; IR-NEXT: [[TMP18]] = xor i1 [[MY_TMP12]], true ; IR-NEXT: br label [[FLOW1]] ; IR: Flow2: ; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP16]]) diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll index b4eb682..f12bed6 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll +++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll @@ -36,17 +36,19 @@ bb4: ; GCN-LABEL: {{^}}negated_cond_dominated_blocks: ; GCN: s_cmp_lg_u32 -; GCN: s_cselect_b64 [[CC1:[^,]+]], -1, 0 +; GCN: s_cselect_b64 [[CC1:[^,]+]], -1, 0 ; GCN: s_branch [[BB1:.LBB[0-9]+_[0-9]+]] ; GCN: [[BB0:.LBB[0-9]+_[0-9]+]] ; GCN-NOT: v_cndmask_b32 ; GCN-NOT: v_cmp ; GCN: [[BB1]]: +; GCN: s_mov_b64 [[CC2:[^,]+]], -1 ; GCN: s_mov_b64 vcc, [[CC1]] ; GCN: s_cbranch_vccz [[BB2:.LBB[0-9]+_[0-9]+]] -; GCN: s_mov_b64 vcc, exec -; GCN: s_cbranch_execnz [[BB0]] +; GCN: s_mov_b64 [[CC2]], 0 ; GCN: [[BB2]]: +; GCN: s_andn2_b64 vcc, exec, [[CC2]] +; GCN: s_cbranch_vccnz [[BB0]] define amdgpu_kernel void @negated_cond_dominated_blocks(i32 addrspace(1)* %arg1) { bb: br label %bb2 diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll index 97e1a16..b06f3b8 100644 --- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll +++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll @@ -54,7 +54,7 @@ ret.bb: ; preds = %else, %main_body } ; GCN-LABEL: {{^}}uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable: -; GCN: s_cbranch_scc0 +; GCN: s_cbranch_vccz ; GCN: ; %bb.{{[0-9]+}}: ; %Flow ; GCN: s_cbranch_execnz [[RETURN:.LBB[0-9]+_[0-9]+]] diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll index 15384e6..6a2b7ca 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll @@ -16,22 +16,22 @@ define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, ; SI-NEXT: s_load_dword s0, s[0:1], 0xf ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s8, 0 -; SI-NEXT: s_cbranch_scc0 .LBB0_4 +; SI-NEXT: s_cbranch_scc0 .LBB0_2 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_add_i32 s2, s11, s0 -; SI-NEXT: s_cbranch_execnz .LBB0_3 -; SI-NEXT: .LBB0_2: ; %if +; SI-NEXT: s_cbranch_execz .LBB0_3 +; SI-NEXT: s_branch .LBB0_4 +; SI-NEXT: .LBB0_2: +; SI-NEXT: ; implicit-def: $sgpr2 +; SI-NEXT: .LBB0_3: ; %if ; SI-NEXT: s_sub_i32 s2, s9, s10 -; SI-NEXT: .LBB0_3: ; %endif +; SI-NEXT: .LBB0_4: ; %endif ; SI-NEXT: s_add_i32 s0, s2, s8 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm -; SI-NEXT: .LBB0_4: -; SI-NEXT: ; implicit-def: $sgpr2 -; SI-NEXT: s_branch .LBB0_2 entry: %0 = icmp eq i32 %a, 0 @@ -59,28 +59,28 @@ define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_cmp_lg_u32 s6, 0 -; SI-NEXT: s_cbranch_scc0 .LBB1_4 +; SI-NEXT: s_cbranch_scc0 .LBB1_2 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_load_dword s2, s[0:1], 0x2e ; SI-NEXT: s_load_dword s3, s[0:1], 0x37 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s2, s3 -; SI-NEXT: s_cbranch_execnz .LBB1_3 -; SI-NEXT: .LBB1_2: ; %if +; SI-NEXT: s_cbranch_execz .LBB1_3 +; SI-NEXT: s_branch .LBB1_4 +; SI-NEXT: .LBB1_2: +; SI-NEXT: ; implicit-def: $sgpr7 +; SI-NEXT: .LBB1_3: ; %if ; SI-NEXT: s_load_dword s2, s[0:1], 0x1c ; SI-NEXT: s_load_dword s0, s[0:1], 0x25 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_add_i32 s7, s2, s0 -; SI-NEXT: .LBB1_3: ; %endif +; SI-NEXT: .LBB1_4: ; %endif ; SI-NEXT: s_add_i32 s0, s7, s6 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, s0 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm -; SI-NEXT: .LBB1_4: -; SI-NEXT: ; implicit-def: $sgpr7 -; SI-NEXT: s_branch .LBB1_2 entry: %cmp0 = icmp eq i32 %a, 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll index ef2acafa..874326d 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -213,7 +213,7 @@ ENDIF: ; preds = %LOOP ; CHECK-LABEL: {{^}}sample_v3: ; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5 ; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7 -; CHECK: s_cbranch +; CHECK: s_branch ; CHECK: BB{{[0-9]+_[0-9]+}}: ; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11 @@ -315,15 +315,13 @@ ENDIF69: ; preds = %LOOP68 ; CHECK-LABEL:{{^}}sample_rsrc ; CHECK: s_cmp_eq_u32 -; CHECK: s_cbranch_scc1 [[END:.LBB[0-9]+_[0-9]+]] +; CHECK: s_cbranch_scc0 [[END:.LBB[0-9]+_[0-9]+]] -; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} -; CHECK: s_endpgm +; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} ; [[END]]: -; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} -; CHECK: s_branch +; CHECK: s_endpgm define amdgpu_ps void @sample_rsrc([6 x <4 x i32>] addrspace(4)* inreg %arg, [17 x <4 x i32>] addrspace(4)* inreg %arg1, [16 x <4 x i32>] addrspace(4)* inreg %arg2, [32 x <8 x i32>] addrspace(4)* inreg %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 { bb: %tmp = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(4)* %arg1, i32 0, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 18492cf..84ee7c5 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -172,8 +172,8 @@ endif: ; GFX1064: s_andn2_b64 exec, exec, s[{{[0-9:]+}}] ; GCN: s_cbranch_execz ; GCN: .LBB{{.*}}: -; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, s{{[0-9]+}} -; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], s[{{[0-9:]+}}] +; GFX1032: s_and_saveexec_b32 s{{[0-9]+}}, vcc_lo +; GFX1064: s_and_saveexec_b64 s[{{[0-9:]+}}], vcc{{$}} ; GCN: s_cbranch_execz ; GCN: ; %bb.{{[0-9]+}}: ; GCN: .LBB{{.*}}: diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll index 1a2ae70..42462b7 100644 --- a/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll +++ b/llvm/test/Transforms/StructurizeCFG/AMDGPU/loop-subregion-misordered.ll @@ -22,25 +22,26 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0 ; CHECK-LABEL: @loop_subregion_misordered( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP:%.*]] = load volatile <2 x i32>, <2 x i32> addrspace(1)* undef, align 16 -; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef, align 8 +; CHECK-NEXT: [[LOAD1:%.*]] = load volatile <2 x float>, <2 x float> addrspace(1)* undef ; CHECK-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, i32 addrspace(1)* [[ARG0:%.*]], i32 [[TID]] ; CHECK-NEXT: [[I_INITIAL:%.*]] = load volatile i32, i32 addrspace(1)* [[GEP]], align 4 ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: LOOP.HEADER: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[FLOW3:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INITIAL]], [[ENTRY:%.*]] ], [ [[TMP4:%.*]], [[FLOW3:%.*]] ] ; CHECK-NEXT: call void asm sideeffect "s_nop 0x100b ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* null, i64 [[TMP12]] ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32> addrspace(1)* [[TMP13]], align 16 ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0 ; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 65535 -; CHECK-NEXT: [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 1 -; CHECK-NEXT: br i1 [[TMP17]], label [[BB62:%.*]], label [[FLOW:%.*]] +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: br i1 [[TMP0]], label [[BB62:%.*]], label [[FLOW:%.*]] ; CHECK: Flow1: -; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] -; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[INC_I:%.*]], [[INCREMENT_I:%.*]] ], [ undef, [[BB62]] ] +; CHECK-NEXT: [[TMP2:%.*]] = phi i1 [ false, [[INCREMENT_I]] ], [ true, [[BB62]] ] +; CHECK-NEXT: [[TMP3:%.*]] = phi i1 [ true, [[INCREMENT_I]] ], [ false, [[BB62]] ] ; CHECK-NEXT: br label [[FLOW]] ; CHECK: bb18: ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP]], i64 0 @@ -49,9 +50,9 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0 ; CHECK-NEXT: [[TMP25:%.*]] = mul nuw nsw i32 [[TMP24]], 52 ; CHECK-NEXT: br label [[INNER_LOOP:%.*]] ; CHECK: Flow2: -; CHECK-NEXT: [[TMP3]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP6:%.*]], [[FLOW]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP8:%.*]], [[FLOW]] ] -; CHECK-NEXT: br i1 [[TMP4]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP59:%.*]], [[INNER_LOOP_BREAK:%.*]] ], [ [[TMP8:%.*]], [[FLOW]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ true, [[INNER_LOOP_BREAK]] ], [ [[TMP10:%.*]], [[FLOW]] ] +; CHECK-NEXT: br i1 [[TMP5]], label [[END_ELSE_BLOCK:%.*]], label [[FLOW3]] ; CHECK: INNER_LOOP: ; CHECK-NEXT: [[INNER_LOOP_J:%.*]] = phi i32 [ [[INNER_LOOP_J_INC:%.*]], [[INNER_LOOP]] ], [ [[TMP25]], [[BB18:%.*]] ] ; CHECK-NEXT: call void asm sideeffect " @@ -60,32 +61,33 @@ define amdgpu_kernel void @loop_subregion_misordered(i32 addrspace(1)* %arg0) #0 ; CHECK-NEXT: br i1 [[INNER_LOOP_CMP]], label [[INNER_LOOP_BREAK]], label [[INNER_LOOP]] ; CHECK: INNER_LOOP_BREAK: ; CHECK-NEXT: [[TMP59]] = extractelement <4 x i32> [[TMP14]], i64 2 -; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #[[ATTR0:[0-9]+]] +; CHECK-NEXT: call void asm sideeffect "s_nop 23 ", "~{memory}"() #0 ; CHECK-NEXT: br label [[FLOW2:%.*]] ; CHECK: bb62: -; CHECK-NEXT: [[LOAD13:%.*]] = icmp uge i32 [[TMP16]], 271 -; CHECK-NEXT: br i1 [[LOAD13]], label [[INCREMENT_I]], label [[FLOW1:%.*]] +; CHECK-NEXT: [[LOAD13:%.*]] = icmp ult i32 [[TMP16]], 271 +; CHECK-NEXT: [[TMP6:%.*]] = xor i1 [[LOAD13]], true +; CHECK-NEXT: br i1 [[TMP6]], label [[INCREMENT_I]], label [[FLOW1:%.*]] ; CHECK: Flow3: -; CHECK-NEXT: [[TMP5:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] -; CHECK-NEXT: br i1 [[TMP5]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] +; CHECK-NEXT: [[TMP7:%.*]] = phi i1 [ [[CMP_END_ELSE_BLOCK:%.*]], [[END_ELSE_BLOCK]] ], [ true, [[FLOW2]] ] +; CHECK-NEXT: br i1 [[TMP7]], label [[FLOW4:%.*]], label [[LOOP_HEADER]] ; CHECK: Flow4: -; CHECK-NEXT: br i1 [[TMP7:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 [[TMP9:%.*]], label [[BB64:%.*]], label [[RETURN:%.*]] ; CHECK: bb64: -; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #[[ATTR0]] +; CHECK-NEXT: call void asm sideeffect "s_nop 42", "~{memory}"() #0 ; CHECK-NEXT: br label [[RETURN]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP6]] = phi i32 [ [[TMP0]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP7]] = phi i1 [ [[TMP1]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP8]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] -; CHECK-NEXT: [[TMP9:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] -; CHECK-NEXT: br i1 [[TMP9]], label [[BB18]], label [[FLOW2]] +; CHECK-NEXT: [[TMP8]] = phi i32 [ [[TMP1]], [[FLOW1]] ], [ undef, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP9]] = phi i1 [ [[TMP2]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP10]] = phi i1 [ [[TMP3]], [[FLOW1]] ], [ false, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i1 [ false, [[FLOW1]] ], [ true, [[LOOP_HEADER]] ] +; CHECK-NEXT: br i1 [[TMP11]], label [[BB18]], label [[FLOW2]] ; CHECK: INCREMENT_I: ; CHECK-NEXT: [[INC_I]] = add i32 [[I]], 1 ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1336 ; CHECK-NEXT: br label [[FLOW1]] ; CHECK: END_ELSE_BLOCK: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x1337 -; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP3]], -1 +; CHECK-NEXT: [[CMP_END_ELSE_BLOCK]] = icmp eq i32 [[TMP4]], -1 ; CHECK-NEXT: br label [[FLOW3]] ; CHECK: RETURN: ; CHECK-NEXT: call void asm sideeffect "s_nop 0x99 diff --git a/llvm/test/Transforms/StructurizeCFG/bug36015.ll b/llvm/test/Transforms/StructurizeCFG/bug36015.ll index 73ef4b1..507b9ae 100644 --- a/llvm/test/Transforms/StructurizeCFG/bug36015.ll +++ b/llvm/test/Transforms/StructurizeCFG/bug36015.ll @@ -18,7 +18,7 @@ loop.inner: br i1 %cond.inner, label %if, label %else ; CHECK: if: -; CHECK: %cond.if = icmp sge i32 %ctr.if, %count +; CHECK: %cond.if.inv = xor i1 %cond.if, true ; CHECK: br label %Flow if: %ctr.if = add i32 %ctr.loop.inner, 1 @@ -27,7 +27,7 @@ if: br i1 %cond.if, label %loop.inner, label %exit ; CHECK: Flow: -; CHECK: %1 = phi i1 [ %cond.if, %if ], [ true, %loop.inner ] +; CHECK: %1 = phi i1 [ %cond.if.inv, %if ], [ true, %loop.inner ] ; CHECK: %2 = phi i1 [ false, %if ], [ true, %loop.inner ] ; CHECK: br i1 %1, label %Flow1, label %loop.inner @@ -43,7 +43,7 @@ else: br i1 %cond.else, label %loop.outer, label %exit ; CHECK: Flow2: -; CHECK: %4 = phi i1 [ %cond.else, %else ], [ true, %Flow1 ] +; CHECK: %4 = phi i1 [ %cond.else.inv, %else ], [ true, %Flow1 ] ; CHECK: br i1 %4, label %exit, label %loop.outer exit: diff --git a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll index 880cbfe..61482bb 100644 --- a/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll +++ b/llvm/test/Transforms/StructurizeCFG/invert-constantexpr.ll @@ -6,7 +6,8 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 { ; CHECK-LABEL: @invert_constantexpr_condition( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = icmp ne i32 [[ARG:%.*]], 0 +; CHECK-NEXT: [[TMP:%.*]] = icmp eq i32 [[ARG:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TMP]], true ; CHECK-NEXT: br i1 icmp eq (i32 ptrtoint (i32* @g to i32), i32 0), label [[BB2:%.*]], label [[FLOW:%.*]] ; CHECK: bb2: ; CHECK-NEXT: br label [[FLOW]] @@ -15,8 +16,8 @@ define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 { ; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], icmp eq (i32 ptrtoint (i32* @g to i32), i32 0) ; CHECK-NEXT: br label [[BB8:%.*]] ; CHECK: Flow: -; CHECK-NEXT: [[TMP0:%.*]] = phi i1 [ [[TMP]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] -; CHECK-NEXT: br i1 [[TMP0]], label [[BB6]], label [[BB3:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB2]] ], [ icmp ne (i32 ptrtoint (i32* @g to i32), i32 0), [[BB:%.*]] ] +; CHECK-NEXT: br i1 [[TMP1]], label [[BB6]], label [[BB3:%.*]] ; CHECK: bb6: ; CHECK-NEXT: [[TMP7]] = icmp slt i32 [[ARG]], [[ARG1:%.*]] ; CHECK-NEXT: br label [[BB3]] diff --git a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll index a6219ec..d21742f 100644 --- a/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll +++ b/llvm/test/Transforms/StructurizeCFG/one-loop-multiple-backedges.ll @@ -8,22 +8,22 @@ bb: br label %bb3 ; CHECK: bb3: -; CHECK: %tmp4 = fcmp oge float %arg1, 3.500000e+00 -; CHECK: br i1 %tmp4, label %bb5, label %Flow +; CHECK: %tmp4.inv = xor i1 %tmp4, true +; CHECK: br i1 %tmp4.inv, label %bb5, label %Flow bb3: ; preds = %bb7, %bb %tmp = phi i64 [ 0, %bb ], [ %tmp8, %bb7 ] %tmp4 = fcmp ult float %arg1, 3.500000e+00 br i1 %tmp4, label %bb7, label %bb5 ; CHECK: bb5: -; CHECK: %tmp6 = fcmp uge float 0.000000e+00, %arg2 +; CHECK: %tmp6.inv = xor i1 %tmp6, true ; CHECK: br label %Flow bb5: ; preds = %bb3 %tmp6 = fcmp olt float 0.000000e+00, %arg2 br i1 %tmp6, label %bb10, label %bb7 ; CHECK: Flow: -; CHECK: %0 = phi i1 [ %tmp6, %bb5 ], [ %tmp4, %bb3 ] +; CHECK: %0 = phi i1 [ %tmp6.inv, %bb5 ], [ %tmp4, %bb3 ] ; CHECK: br i1 %0, label %bb7, label %Flow1 ; CHECK: bb7: @@ -34,7 +34,7 @@ bb7: ; preds = %bb5, %bb3 br i1 %tmp9, label %bb3, label %bb10 ; CHECK: Flow1: -; CHECK: %3 = phi i1 [ %tmp9, %bb7 ], [ true, %Flow ] +; CHECK: %3 = phi i1 [ %tmp9.inv, %bb7 ], [ true, %Flow ] ; CHECK: br i1 %3, label %bb10, label %bb3 ; CHECK: bb10: diff --git a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll index 21831e6..291e9a5 100644 --- a/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll +++ b/llvm/test/Transforms/StructurizeCFG/post-order-traversal-bug.ll @@ -59,7 +59,7 @@ for.end: ; preds = %for.body.1, %if.the ; CHECK: br i1 %{{[0-9]}}, label %for.body.1, label %Flow2 ; CHECK: for.body.1: -; CHECK: br i1 %cmp1.5, label %for.body.6, label %Flow3 +; CHECK: br i1 %cmp1.5.inv, label %for.body.6, label %Flow3 for.body.1: ; preds = %if.then, %lor.lhs.false %best_val.233 = phi float [ %tmp5, %if.then ], [ %best_val.027, %lor.lhs.false ] %best_count.231 = phi i32 [ %sub4, %if.then ], [ %best_count.025, %lor.lhs.false ] |