# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -start-before=block-placement -o - %s | FileCheck %s # Test that loop headers are aligned to 32 bytes on GFX950 when the first # instruction is 8 bytes, to prevent the instruction from being split by the # 32-byte fetch window boundary. # The second test case verifies that 4-byte instructions do NOT trigger # alignment (CHECK-NEXT chain would break if .p2align were inserted). --- name: loop_with_8byte_first_inst tracksRegLiveness: true body: | ; CHECK-LABEL: loop_with_8byte_first_inst: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: .p2align 5, , 4 ; CHECK-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0_vgpr1 renamable $sgpr0_sgpr1 = S_MOV_B64 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 1, killed renamable $vgpr0_vgpr1, implicit $exec V_CMP_EQ_U64_e32 0, $vgpr0_vgpr1, implicit-def $vcc, implicit $exec renamable $sgpr0_sgpr1 = S_OR_B64 killed renamable $vcc, killed renamable $sgpr0_sgpr1, implicit-def $scc $exec = S_ANDN2_B64 $exec, renamable $sgpr0_sgpr1, implicit-def $scc S_CBRANCH_EXECNZ %bb.1, implicit $exec bb.2: liveins: $sgpr0_sgpr1 $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc S_SETPC_B64_return undef $sgpr30_sgpr31 ... --- name: loop_with_4byte_first_inst tracksRegLiveness: true body: | ; CHECK-LABEL: loop_with_4byte_first_inst: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b64 s[0:1], 0 ; CHECK-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: v_add_u32_e32 v0, 1, v0 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr0 renamable $sgpr0_sgpr1 = S_MOV_B64 0 bb.1: successors: %bb.2(0x04000000), %bb.1(0x7c000000) liveins: $sgpr0_sgpr1, $vgpr0 renamable $vgpr0 = V_ADD_U32_e32 1, killed $vgpr0, implicit $exec V_CMP_LT_U32_e32 10, $vgpr0, implicit-def $vcc, implicit $exec renamable $sgpr0_sgpr1 = S_OR_B64 killed renamable $vcc, killed renamable $sgpr0_sgpr1, implicit-def $scc $exec = S_ANDN2_B64 $exec, renamable $sgpr0_sgpr1, implicit-def $scc S_CBRANCH_EXECNZ %bb.1, implicit $exec bb.2: liveins: $sgpr0_sgpr1 $exec = S_OR_B64 $exec, killed renamable $sgpr0_sgpr1, implicit-def $scc S_SETPC_B64_return undef $sgpr30_sgpr31 ...