; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -stop-after=si-insert-waitcnts < %s | FileCheck %s ; Testcase reduced from Blender 4.1 where we generated incorrect waitcnts due to a bad ; WaitcntBrackets::merge implementation. %struct.bar = type { %struct.bar.0 } %struct.bar.0 = type { float, float, float, float } define amdgpu_kernel void @widget(ptr addrspace(1) %arg, i1 %arg1) { ; CHECK-LABEL: name: widget ; CHECK: bb.0.bb: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $sgpr22_sgpr23 = S_MOV_B64 $sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: $sgpr20_sgpr21 = S_MOV_B64 killed $sgpr0_sgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: renamable $sgpr2 = S_LOAD_DWORD_IMM renamable $sgpr8_sgpr9, 8, 0 :: (dereferenceable invariant load (s32) from %ir.arg1.kernarg.offset.align.down, align 8, addrspace 4) ; CHECK-NEXT: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: $sgpr20 = S_ADD_U32 $sgpr20, killed $sgpr17, implicit-def $scc, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: $sgpr21 = S_ADDC_U32 $sgpr21, 0, implicit-def dead $scc, implicit killed $scc, implicit-def $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: renamable $vgpr1 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: S_WAITCNT 49279 ; CHECK-NEXT: S_BITCMP1_B32 killed renamable $sgpr2, 0, implicit-def $scc ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 ; CHECK-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec, implicit $exec ; CHECK-NEXT: renamable $sgpr2_sgpr3 = S_CSELECT_B64 -1, 0, implicit killed $scc ; CHECK-NEXT: $vgpr2_vgpr3 = V_PK_MOV_B32 8, 0, 8, 0, 0, 0, 0, 0, 0, implicit $exec ; CHECK-NEXT: renamable $sgpr6_sgpr7 = IMPLICIT_DEF ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.10.loop.exit.guard: ; CHECK-NEXT: successors: %bb.11(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def dead $scc ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 ; CHECK-NEXT: renamable $sgpr6_sgpr7 = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit killed $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.bb2: ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_WAITCNT 3952 ; CHECK-NEXT: renamable $vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec ; CHECK-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 4, $vgpr0_vgpr1, implicit $exec ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD killed renamable $vgpr4_vgpr5, 0, 0, implicit $exec :: (load (s32) from %ir.getelementptr, align 16, addrspace 1) ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_OR_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def dead $scc ; CHECK-NEXT: S_WAITCNT 3952 ; CHECK-NEXT: V_CMP_GT_I32_e32 1, killed $vgpr0, implicit-def $vcc, implicit $exec ; CHECK-NEXT: renamable $vgpr0 = IMPLICIT_DEF ; CHECK-NEXT: $sgpr8_sgpr9 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.12.bb13: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_ANDN2_B64 killed renamable $sgpr6_sgpr7, $exec, implicit-def dead $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.Flow3: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr8_sgpr9, implicit-def $scc ; CHECK-NEXT: renamable $sgpr8_sgpr9 = S_AND_B64 $exec, renamable $sgpr6_sgpr7, implicit-def $scc ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr8_sgpr9, killed renamable $sgpr4_sgpr5, implicit-def $scc ; CHECK-NEXT: $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc ; CHECK-NEXT: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr20_sgpr21_sgpr22_sgpr23, 0, 0, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(5) null`, addrspace 5) ; CHECK-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: S_BRANCH %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9.Flow2: ; CHECK-NEXT: successors: %bb.10(0x04000000), %bb.4(0x7c000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCZ %bb.10, implicit killed $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.bb6: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_WAITCNT 3952 ; CHECK-NEXT: renamable $sgpr6_sgpr7 = V_CMP_EQ_U32_e64 0, killed $vgpr4, implicit $exec ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr6_sgpr7, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.5, implicit killed $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6.bb9: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR renamable $vgpr1, renamable $vgpr1, renamable $sgpr0_sgpr1, 0, 0, implicit $exec :: (store (s32) into %ir.arg.load, addrspace 1) ; CHECK-NEXT: renamable $vgpr4 = GLOBAL_LOAD_DWORD renamable $vgpr2_vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(1) null`, addrspace 1) ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_MOV_B64 -1 ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $vgpr4 = V_MOV_B32_e32 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7.Flow: ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr6_sgpr7, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1 ; CHECK-NEXT: $vcc = S_ANDN2_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_MOV_B64 -1 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.9, implicit killed $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8.bb11: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: liveins: $vgpr0, $vgpr4, $sgpr0_sgpr1, $sgpr2_sgpr3, $vgpr0_vgpr1:0x000000000000000C, $vgpr2_vgpr3, $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $sgpr2_sgpr3 ; CHECK-NEXT: S_BRANCH %bb.9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.11.DummyReturnBlock: ; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_ENDPGM 0 bb: br label %bb2 bb2: ; preds = %bb13, %bb11, %bb %phi = phi i32 [ 0, %bb ], [ %load14, %bb13 ], [ %load4, %bb11 ] %xor = xor i32 %phi, 1 %zext = zext i32 %xor to i64 %getelementptr = getelementptr %struct.bar, ptr addrspace(1) null, i64 %zext %load = load i32, ptr addrspace(1) %getelementptr, align 16 %icmp = icmp sgt i32 %load, 0 br i1 %icmp, label %bb3, label %bb13 bb3: ; preds = %bb2 %load4 = load i32, ptr addrspace(5) null, align 4 %load5 = load i32, ptr addrspace(1) null, align 4 br label %bb6 bb6: ; preds = %bb11, %bb3 %phi7 = phi i32 [ %load5, %bb3 ], [ %phi12, %bb11 ] %icmp8 = icmp eq i32 %phi7, 0 br i1 %icmp8, label %bb11, label %bb9 bb9: ; preds = %bb6 store i32 0, ptr addrspace(1) %arg, align 4 %load10 = load i32, ptr addrspace(1) null, align 4 br label %bb11 bb11: ; preds = %bb9, %bb6 %phi12 = phi i32 [ 0, %bb6 ], [ %load10, %bb9 ] br i1 %arg1, label %bb2, label %bb6 bb13: ; preds = %bb2 %load14 = load i32, ptr addrspace(1) null, align 4 br label %bb2 }