diff options
author | Jeffrey Byrnes <jeffrey.byrnes@amd.com> | 2025-07-17 02:09:52 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-17 18:09:52 +0900 |
commit | b291d1a71f39eb14b89b6aeccfc10bcd3c92a1ef (patch) | |
tree | 749dd0509d8a3f4aa16423c6bb12eac02c621435 | |
parent | 3dc5d687b09af5568e9bd80160addb550a46e341 (diff) | |
download | llvm-b291d1a71f39eb14b89b6aeccfc10bcd3c92a1ef.zip llvm-b291d1a71f39eb14b89b6aeccfc10bcd3c92a1ef.tar.gz llvm-b291d1a71f39eb14b89b6aeccfc10bcd3c92a1ef.tar.bz2 |
[TII] Do not fold undef copies (#147392)
RegallocBase::cleanupFailedVReg hacks up the state of the liveness in
order to facilitate producing valid IR. During this process, we may end
up producing undef copies.
If the destination of these copies is a spill candidate, we will attempt
to fold the source register when issuing the spill. The undef of the
source is not propagated to storeRegToStackSlot , thus we end up
dropping the undef, issuing a spill, and producing an illegal liveness
state.
This checks for undef copies, and, if found, inserts a kill instead of
spill.
-rw-r--r-- | llvm/lib/CodeGen/TargetInstrInfo.cpp | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/regalloc-undef-copy-fold.mir | 79 |
2 files changed, 90 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp index 518a933..18d6bbc 100644 --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -792,12 +792,18 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI, const MachineOperand &MO = MI.getOperand(1 - Ops[0]); MachineBasicBlock::iterator Pos = MI; - - if (Flags == MachineMemOperand::MOStore) - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, - Register()); - else + if (Flags == MachineMemOperand::MOStore) { + if (MO.isUndef()) { + // If this is an undef copy, we do not need to bother we inserting spill + // code. + BuildMI(*MBB, Pos, MI.getDebugLoc(), get(TargetOpcode::KILL)).add(MO); + } else { + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI, + Register()); + } + } else loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI, Register()); + return &*--Pos; } diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-undef-copy-fold.mir b/llvm/test/CodeGen/AMDGPU/regalloc-undef-copy-fold.mir new file mode 100644 index 0000000..b416c96 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/regalloc-undef-copy-fold.mir @@ -0,0 +1,79 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs --start-before=greedy,2 --stop-after=greedy,2 %s -o - | FileCheck %s + +# Make sure there's no machine verifier error + +# If RA is unable to find a register to allocate, then cleanupFailedVReg will do ad-hoc rewriting and will insert undefs to make the LiveRanges workable. +# %30:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3 is an example of such a rewrite / undef. If we were to want to spill %30, we should not be inserting +# actual spill code, as the source operand is undef. +# Check that there are no verfier issues with the LiveRange of $vgpr0_vgpr1_vgpr2_vgpr3 / that we do not insert spill code for %30. + + +--- | + define void @foo() #0 { + ret void + } + + attributes #0 = { "amdgpu-waves-per-eu"="8,8" } + +... + +--- +name: foo +tracksRegLiveness: true +stack: + - { id: 0, type: spill-slot, size: 32, alignment: 4 } +machineFunctionInfo: + maxKernArgAlign: 4 + isEntryFunction: true + waveLimiter: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + frameOffsetReg: '$sgpr33' + hasSpilledVGPRs: true + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + dispatchPtr: { reg: '$sgpr4_sgpr5' } + kernargSegmentPtr: { reg: '$sgpr6_sgpr7' } + workGroupIDX: { reg: '$sgpr8' } + privateSegmentWaveByteOffset: { reg: '$sgpr9' } +body: | + bb.0: + ; CHECK-LABEL: name: foo + ; CHECK: INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 10 /* regdef */, def %10, 10 /* regdef */, def %1, 10 /* regdef */, def %2, 10 /* regdef */, def $vgpr0_vgpr1_vgpr2_vgpr3, 10 /* regdef */, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: KILL undef $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: SI_SPILL_AV160_SAVE %2, %stack.1, $sgpr32, 0, implicit $exec :: (store (s160) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: SI_SPILL_AV256_SAVE %1, %stack.3, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_512 = COPY %10 + ; CHECK-NEXT: SI_SPILL_V512_SAVE [[COPY]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_512 = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; CHECK-NEXT: SI_SPILL_V512_SAVE [[COPY1]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-NEXT: [[SI_SPILL_V512_RESTORE:%[0-9]+]]:vreg_512 = SI_SPILL_V512_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.6, align 4, addrspace 5) + ; CHECK-NEXT: $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = COPY [[SI_SPILL_V512_RESTORE]] + ; CHECK-NEXT: [[SI_SPILL_V512_RESTORE1:%[0-9]+]]:vreg_512 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) + ; CHECK-NEXT: SI_SPILL_V512_SAVE [[SI_SPILL_V512_RESTORE1]], %stack.4, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: [[SI_SPILL_AV256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_AV256_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.3, align 4, addrspace 5) + ; CHECK-NEXT: SI_SPILL_V256_SAVE [[SI_SPILL_AV256_RESTORE]], %stack.5, $sgpr32, 0, implicit $exec :: (store (s256) into %stack.5, align 4, addrspace 5) + ; CHECK-NEXT: [[SI_SPILL_AV160_RESTORE:%[0-9]+]]:vreg_160 = SI_SPILL_AV160_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s160) from %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_AV128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5) + ; CHECK-NEXT: [[SI_SPILL_AV512_RESTORE:%[0-9]+]]:av_512 = SI_SPILL_AV512_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.4, align 4, addrspace 5) + ; CHECK-NEXT: [[SI_SPILL_V256_RESTORE:%[0-9]+]]:vreg_256 = SI_SPILL_V256_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s256) from %stack.5, align 4, addrspace 5) + ; CHECK-NEXT: INLINEASM &"; use $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 9 /* reguse */, [[SI_SPILL_AV512_RESTORE]], 9 /* reguse */, [[SI_SPILL_V256_RESTORE]], 9 /* reguse */, [[SI_SPILL_AV160_RESTORE]], 9 /* reguse */, undef $vgpr0_vgpr1_vgpr2_vgpr3, 9 /* reguse */, $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 + ; CHECK-NEXT: SI_RETURN + INLINEASM &"; def $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 10, def %22:vreg_512, 10, def %25:vreg_256, 10, def %28:vreg_160, 10, def $vgpr0_vgpr1_vgpr2_vgpr3, 10, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + %30:av_128 = COPY undef $vgpr0_vgpr1_vgpr2_vgpr3 + %27:av_160 = COPY %28:vreg_160 + %24:av_256 = COPY %25:vreg_256 + SI_SPILL_V512_SAVE %22:vreg_512, %stack.0, $sgpr32, 0, implicit $exec :: (store (s512) into %stack.0, align 4, addrspace 5) + %18:vreg_512 = COPY $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + INLINEASM &"; clobber", 1 /* sideeffect attdialect */, 10, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 = COPY %18:vreg_512 + %23:vreg_512 = SI_SPILL_V512_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s512) from %stack.0, align 4, addrspace 5) + %26:vreg_256 = COPY %24:av_256 + %29:vreg_160 = COPY %27:av_160 + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %30:av_128 + INLINEASM &"; use $0 $1 $2 $3 $4", 1 /* sideeffect attdialect */, 9, %23:vreg_512, 9, %26:vreg_256, 9, %29:vreg_160, 9, undef $vgpr0_vgpr1_vgpr2_vgpr3, 9, $agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16 + SI_RETURN + +... |