diff options
author | Christudasan Devadasan <christudasan.devadasan@amd.com> | 2024-11-12 23:30:57 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-12 23:30:57 +0530 |
commit | 2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df (patch) | |
tree | 43f98c075c1daf3c917a0be33594c57e548fb59a | |
parent | 1791b25f43f4e6a0b21284ce8076cfab160cb61a (diff) | |
download | llvm-2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df.zip llvm-2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df.tar.gz llvm-2b5b57c5cf78af66b5b9f514c4b51b4adc9a80df.tar.bz2 |
[AMDGPU] Skip non-wwm reg implicit-def from bb prolog (#115834)
Currently all implicit-def instructions are part of
bb prolog. We should only include the wwm-register's
implicit definitions into the BB prolog. The other
vector class registers' implicit defs when exist at
the bb top might cause interference when pushed the
LR_split copy insertion downwards. The SplitKit is
very strict on altering the insertion points and will
assert such instances.
10 files changed, 76 insertions, 66 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ad45af0..c864f03 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8909,16 +8909,19 @@ bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI, // needed by the prolog. However, the insertions for scalar registers can // always be placed at the BB top as they are independent of the exec mask // value. + const MachineFunction *MF = MI.getParent()->getParent(); bool IsNullOrVectorRegister = true; if (Reg) { - const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); + const MachineRegisterInfo &MRI = MF->getRegInfo(); IsNullOrVectorRegister = !RI.isSGPRClass(RI.getRegClassForReg(MRI, Reg)); } uint16_t Opcode = MI.getOpcode(); + const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); return IsNullOrVectorRegister && (isSGPRSpill(Opcode) || isWWMRegSpillOpcode(Opcode) || - Opcode == AMDGPU::IMPLICIT_DEF || + (Opcode == AMDGPU::IMPLICIT_DEF && + MFI->isWWMReg(MI.getOperand(0).getReg())) || (!MI.isTerminator() && Opcode != AMDGPU::COPY && MI.modifiesRegister(AMDGPU::EXEC, &RI))); } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 018322e..2a75468 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -596,6 +596,10 @@ public: SMDiagnostic &Error, SMRange &SourceRange); void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); } + bool isWWMReg(Register Reg) const { + return Reg.isVirtual() ? checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG) + : WWMReservedRegs.contains(Reg); + } void updateNonWWMRegMask(BitVector &RegMask) { NonWWMRegMask = RegMask; } BitVector getNonWWMRegMask() const { return NonWWMRegMask; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir index 4657311..5bbe3e4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir @@ -19,8 +19,8 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc @@ -122,8 +122,8 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc @@ -790,8 +790,8 @@ body: | ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll index 88fd7dc..c942610 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll @@ -170,8 +170,8 @@ define <4 x float> @waterfall_loop(<8 x i32> %vgpr_srd) { ; CHECK-NEXT: v_readlane_b32 s4, v16, 4 ; CHECK-NEXT: s_mov_b32 exec_lo, s4 ; CHECK-NEXT: ; %bb.4: -; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:76 ; 4-byte Folded Reload +; CHECK-NEXT: ; implicit-def: $sgpr4 ; CHECK-NEXT: v_mov_b32_e32 v1, s4 ; CHECK-NEXT: v_mov_b32_e32 v2, s4 ; CHECK-NEXT: v_mov_b32_e32 v3, s4 diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll index b3b26df..50c9c0c 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll @@ -1135,11 +1135,11 @@ define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { ; GCN-O0-NEXT: s_cbranch_execz .LBB5_5 ; GCN-O0-NEXT: ; %bb.3: ; %bb4 ; GCN-O0-NEXT: ; in Loop: Header=BB5_1 Depth=1 -; GCN-O0-NEXT: ; implicit-def: $sgpr4 ; GCN-O0-NEXT: s_or_saveexec_b64 s[14:15], -1 ; GCN-O0-NEXT: s_waitcnt expcnt(0) ; GCN-O0-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-O0-NEXT: s_mov_b64 exec, s[14:15] +; GCN-O0-NEXT: ; implicit-def: $sgpr4 ; GCN-O0-NEXT: v_mov_b32_e32 v0, s4 ; GCN-O0-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen ; GCN-O0-NEXT: s_mov_b32 s4, 0 diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll index 6fed988..e71c6cf 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll @@ -5370,9 +5370,9 @@ define amdgpu_kernel void @extract_vgpr_offset_multiple_in_block(ptr addrspace(1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v18, off, s[36:39], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[28:29] -; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s4, v18, 25 +; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s7, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 @@ -6223,8 +6223,8 @@ define amdgpu_kernel void @insert_vgpr_offset_multiple_in_block(ptr addrspace(1) ; NOOPT-NEXT: s_mov_b64 exec, s[0:1] ; NOOPT-NEXT: s_cbranch_execz .LBB17_8 ; NOOPT-NEXT: ; %bb.7: ; %bb1 -; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: buffer_load_dword v0, off, s[28:31], 0 offset:68 ; 4-byte Folded Reload +; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s6, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 @@ -7286,10 +7286,10 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] -; NOOPT-NEXT: ; implicit-def: $sgpr2 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v4, 0 ; NOOPT-NEXT: v_readlane_b32 s1, v4, 1 +; NOOPT-NEXT: ; implicit-def: $sgpr2 ; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; NOOPT-NEXT: s_mov_b32 s0, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 @@ -7316,11 +7316,11 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: ;;#ASMEND ; NOOPT-NEXT: s_branch .LBB19_4 ; NOOPT-NEXT: .LBB19_3: ; %bb4 -; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_or_saveexec_b64 s[8:9], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v4, off, s[12:15], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] +; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s6, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 @@ -7345,8 +7345,8 @@ define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) { ; NOOPT-NEXT: s_mov_b64 exec, s[8:9] ; NOOPT-NEXT: s_branch .LBB19_1 ; NOOPT-NEXT: .LBB19_4: ; %bb7 -; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr4 +; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s7, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 @@ -7529,10 +7529,10 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] -; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v4, 0 ; NOOPT-NEXT: v_readlane_b32 s1, v4, 1 +; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 ; NOOPT-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] ; NOOPT-NEXT: s_mov_b32 s0, 1 ; NOOPT-NEXT: ; implicit-def: $sgpr1 @@ -7560,11 +7560,11 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: ;;#ASMEND ; NOOPT-NEXT: s_branch .LBB20_4 ; NOOPT-NEXT: .LBB20_3: ; %bb4 -; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_or_saveexec_b64 s[12:13], -1 ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v4, off, s[16:19], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] +; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s6, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 @@ -7590,8 +7590,8 @@ define amdgpu_kernel void @insert_adjacent_blocks(i32 %arg, float %val0) { ; NOOPT-NEXT: s_mov_b64 exec, s[12:13] ; NOOPT-NEXT: s_branch .LBB20_1 ; NOOPT-NEXT: .LBB20_4: ; %bb7 -; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5_sgpr6_sgpr7 +; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: s_mov_b32 s10, s1 ; NOOPT-NEXT: ; implicit-def: $sgpr0_sgpr1 ; NOOPT-NEXT: ; kill: def $sgpr0 killed $sgpr0 killed $sgpr0_sgpr1 @@ -9105,9 +9105,9 @@ define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) { ; NOOPT-NEXT: s_waitcnt expcnt(0) ; NOOPT-NEXT: buffer_load_dword v18, off, s[24:27], 0 ; 4-byte Folded Reload ; NOOPT-NEXT: s_mov_b64 exec, s[20:21] -; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3 ; NOOPT-NEXT: s_waitcnt vmcnt(0) ; NOOPT-NEXT: v_readlane_b32 s0, v18, 1 +; NOOPT-NEXT: ; implicit-def: $sgpr2_sgpr3 ; NOOPT-NEXT: ; kill: def $sgpr3 killed $sgpr3 killed $sgpr2_sgpr3 ; NOOPT-NEXT: ; implicit-def: $sgpr4_sgpr5 ; NOOPT-NEXT: ; kill: def $sgpr4 killed $sgpr4 killed $sgpr4_sgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir index 7421a2e..215200c 100644 --- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir +++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill-inspect-subrange.mir @@ -27,11 +27,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr5 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[DEF:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) - ; CHECK-NEXT: SI_SPILL_S512_SAVE killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25 ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5 @@ -39,15 +37,14 @@ body: | ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5 ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.4, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_1:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: {{ $}} @@ -55,33 +52,37 @@ body: | ; CHECK-NEXT: SI_RETURN ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr12 = IMPLICIT_DEF + ; CHECK-NEXT: SI_SPILL_S512_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s512) into %stack.0, align 4, addrspace 5) ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_2:%[0-9]+]].sub0:vreg_96 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_3:%[0-9]+]].sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], undef renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, killed renamable $sgpr20_sgpr21_sgpr22_sgpr23, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_3:%[0-9]+]].sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF2]], undef renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_4:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, undef renamable $sgpr4_sgpr5, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = SI_SPILL_S512_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s512) from %stack.0, align 4, addrspace 5) - ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.7, implicit undef $vcc ; CHECK-NEXT: S_BRANCH %bb.6 diff --git a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir index 8ae6c27..b8818c5 100644 --- a/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/infloop-subrange-spill.mir @@ -23,13 +23,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[DEF:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: renamable $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF ; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[DEF2:%[0-9]+]].sub0:vreg_64 = IMPLICIT_DEF ; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[DEF3:%[0-9]+]].sub1:vreg_64 = IMPLICIT_DEF ; CHECK-NEXT: dead renamable $sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: renamable $sgpr24 = IMPLICIT_DEF implicit-def $sgpr25 ; CHECK-NEXT: renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) ; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5 @@ -37,47 +37,49 @@ body: | ; CHECK-NEXT: S_BRANCH %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_MOV_B64_term undef renamable $sgpr4_sgpr5 ; CHECK-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s256), addrspace 4) - ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_1:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: SI_RETURN ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: renamable $sgpr12 = IMPLICIT_DEF ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_2:%[0-9]+]].sub0:vreg_96 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43, renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_3:%[0-9]+]].sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], undef renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, killed renamable $sgpr20_sgpr21_sgpr22_sgpr23, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = S_LOAD_DWORDX16_IMM undef renamable $sgpr4_sgpr5, 0, 0 :: (invariant load (s512), align 32, addrspace 4) + ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[IMAGE_SAMPLE_LZ_V1_V2_3:%[0-9]+]].sub0:vreg_128 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF3]], undef renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, killed renamable $sgpr12_sgpr13_sgpr14_sgpr15, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: S_BRANCH %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = IMPLICIT_DEF ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_4:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51 = COPY killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr24_sgpr25_sgpr26_sgpr27:0x000000000000000F, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x000000000000FFFF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, undef renamable $sgpr4_sgpr5, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.7, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: liveins: $sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF + ; CHECK-NEXT: liveins: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19:0x000000000000FFFF, $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51:0x00000000FFFFFFFF ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX8_IMM undef renamable $sgpr4_sgpr5, 32, 0 :: (invariant load (s256), addrspace 4) - ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], killed renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19, undef renamable $sgpr24_sgpr25_sgpr26_sgpr27, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) + ; CHECK-NEXT: dead [[IMAGE_SAMPLE_LZ_V1_V2_5:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_LZ_V1_V2 undef [[DEF]], renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51, undef renamable $sgpr8_sgpr9_sgpr10_sgpr11, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) ; CHECK-NEXT: renamable $sgpr25 = COPY undef renamable $sgpr24, implicit-def $sgpr24 ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.6, implicit undef $vcc ; CHECK-NEXT: S_BRANCH %bb.5 diff --git a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll index 3149d7c..93eb928 100644 --- a/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll +++ b/llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll @@ -65,9 +65,9 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 { ; CHECK-NEXT: buffer_store_dword v10, v0, s[0:3], s33 offen ; 4-byte Folded Spill ; CHECK-NEXT: s_cbranch_scc1 .LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %store +; CHECK-NEXT: s_add_i32 s4, s33, 0x100000 +; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s4 ; 4-byte Folded Reload ; CHECK-NEXT: ; implicit-def: $sgpr4 -; CHECK-NEXT: s_add_i32 s5, s33, 0x100000 -; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s5 ; 4-byte Folded Reload ; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: ds_write_b32 v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/merge-m0.mir b/llvm/test/CodeGen/AMDGPU/merge-m0.mir index 19a2652..614ee67 100644 --- a/llvm/test/CodeGen/AMDGPU/merge-m0.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-m0.mir @@ -2,9 +2,9 @@ # GCN-LABEL: name: merge-m0-many-init # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN-NEXT: DS_WRITE_B32 # GCN-NEXT: SI_INIT_M0 65536 @@ -128,9 +128,9 @@ body: | # GCN-LABEL: name: merge-m0-dont-hoist-past-init-with-different-initializer # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 65536 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 65536 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -187,7 +187,7 @@ body: | # GCN: bb.0.entry: # GCN-NOT: SI_INIT_M0 # GCN: S_OR_B64 -# GCN: SI_INIT_M0 +# GCN-NEXT: SI_INIT_M0 # GCN: bb.1: # GCN-NOT: SI_INIT_M0 -1 @@ -259,8 +259,8 @@ body: | ... # GCN-LABEL: name: move-m0-different-initializer -# GCN: %1:vgpr_32 = IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF # GCN: SI_INIT_M0 65536 # GCN-NEXT: S_NOP --- @@ -299,10 +299,10 @@ body: | # GCN-LABEL: name: m0-in-loop-0 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -343,10 +343,10 @@ body: | # GCN-LABEL: name: m0-in-loop-1 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -384,10 +384,10 @@ body: | # GCN-LABEL: name: m0-in-loop-2 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -429,10 +429,10 @@ body: | # GCN-LABEL: name: m0-in-loop-3 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -477,10 +477,10 @@ body: | # GCN-LABEL: name: m0-in-loop-4 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -525,10 +525,10 @@ body: | # GCN-LABEL: name: m0-in-loop-5 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -574,10 +574,10 @@ body: | # GCN-LABEL: name: m0-in-loop-6 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: @@ -627,10 +627,10 @@ body: | # GCN-LABEL: name: m0-in-loop-7 # GCN: bb.0.entry: -# GCN: IMPLICIT_DEF +# GCN: SI_INIT_M0 -1 +# GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF # GCN-NEXT: IMPLICIT_DEF -# GCN-NEXT: SI_INIT_M0 -1 # GCN-NEXT: DS_WRITE_B32 # GCN: bb.1: |