diff options
author | macurtis-amd <macurtis@amd.com> | 2025-08-01 11:54:25 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-01 11:54:25 -0500 |
commit | d28bb1fd786677fca73390a57972d3c722c30dcd (patch) | |
tree | 5509c3952b801fb46796804d534dd9d57ffd2e11 | |
parent | a7425f900f6467dcbc7847ba0d80382e346a1030 (diff) | |
download | llvm-d28bb1fd786677fca73390a57972d3c722c30dcd.zip llvm-d28bb1fd786677fca73390a57972d3c722c30dcd.tar.gz llvm-d28bb1fd786677fca73390a57972d3c722c30dcd.tar.bz2 |
[AMDGPU] Ensure non-reserved CSR spilled regs are live-in (#146427)
Fixes:
```
*** Bad machine code: Using an undefined physical register ***
- function: widget
- basic block: %bb.0 bb (0x564092cbe140)
- instruction: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
- operand 1: killed $agpr13
LLVM ERROR: Found 1 machine code errors.
```
The detailed sequence of events that led to this assert:
1. MachineVerifier fails because `$agpr13` is not defined on line 19
below:
``` 1: bb.0.bb:
2: successors: %bb.1(0x80000000); %bb.1(100.00%)
3: liveins: $agpr14, $agpr15, $sgpr12, $sgpr13, $sgpr14, \
4: $sgpr15, $sgpr30, $sgpr31, $sgpr34, $sgpr35, \
5: $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr48, \
6: $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, \
7: $sgpr54, $sgpr55, $sgpr64, $sgpr65, $sgpr66, \
8: $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, \
9: $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, \
10: $sgpr85, $sgpr86, $sgpr87, $sgpr96, $sgpr97, \
11: $sgpr98, $sgpr99, $vgpr0, $vgpr31, $vgpr40, $vgpr41, \
12: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, \
13: $sgpr10_sgpr11
14: $sgpr16 = COPY $sgpr33
15: $sgpr33 = frame-setup COPY $sgpr32
16: $sgpr18_sgpr19 = S_XOR_SAVEEXEC_B64 -1, \
17: implicit-def $exec, implicit-def dead $scc, \
18: implicit $exec
19: $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr13, \
20: implicit $exec
21: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, \
22: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, \
23: implicit $exec :: (store (s32) into %stack.38, \
24: addrspace 5)
25: ...
26: $vgpr43 = IMPLICIT_DEF
27: $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, \
28: killed $vgpr43(tied-def 0)
29: $vgpr43 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, \
30: killed $vgpr43(tied-def 0)
31: $sgpr100_sgpr101 = S_OR_SAVEEXEC_B64 -1, \
32: implicit-def $exec, implicit-def dead $scc, \
33: implicit $exec
34: renamable $agpr13 = COPY killed $vgpr43, implicit $exec
```
2. That instruction is created by
[`emitCSRSpillStores`](https://github.com/llvm/llvm-project/blob/d599bdeaa49d7a2b1246328630328d23ddda5a47/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp#L977)
(called by
[`SIFrameLowering::emitPrologue`](https://github.com/llvm/llvm-project/blob/d599bdeaa49d7a2b1246328630328d23ddda5a47/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp#L1122))
because `$agpr13` is in `WWMSpills`.
See lines 982, 998, and 993 below.
```
977: // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the
scratch
978: // registers. However, save all lanes of callee-saved VGPRs. Due to
this, we
979: // might end up flipping the EXEC bits twice.
980: Register ScratchExecCopy;
981: SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs,
WWMScratchRegs;
982: FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs,
WWMScratchRegs);
983: if (!WWMScratchRegs.empty())
984: ScratchExecCopy =
985: buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
986: /*IsProlog*/ true, /*EnableInactiveLanes*/ true);
987:
988: auto StoreWWMRegisters =
989: [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
990: for (const auto &Reg : WWMRegs) {
991: Register VGPR = Reg.first;
992: int FI = Reg.second;
993: buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
994: VGPR, FI, FrameReg);
995: }
996: };
997:
998: StoreWWMRegisters(WWMScratchRegs);
```
3. `$agpr13` got added to `WWMSpills` by
[`SILowerWWMCopies::run`](https://github.com/llvm/llvm-project/blob/59a7185dd9d69cbf737a98f5c2d1cf3d456bee03/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp#L137)
as it processed the `WWM_COPY` on line 3 below (corresponds to line 34
above in point #_1_):
```
1: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, %45:vgpr_32(tied-def
0)
2: %45:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr14, 1, %45:vgpr_32(tied-def
0)
3: %44:av_32 = WWM_COPY %45:vgpr_32
```
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir | 97 |
2 files changed, 104 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 11552b3..9b348d4 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -983,6 +983,7 @@ void SIFrameLowering::emitCSRSpillStores( const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); const SIInstrInfo *TII = ST.getInstrInfo(); const SIRegisterInfo &TRI = TII->getRegisterInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch // registers. However, save all lanes of callee-saved VGPRs. Due to this, we @@ -1005,6 +1006,12 @@ void SIFrameLowering::emitCSRSpillStores( } }; + for (const Register Reg : make_first_range(WWMScratchRegs)) { + if (!MRI.isReserved(Reg)) { + MRI.addLiveIn(Reg); + MBB.addLiveIn(Reg); + } + } StoreWWMRegisters(WWMScratchRegs); auto EnableAllLanes = [&]() { diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir new file mode 100644 index 0000000..7336a54 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir @@ -0,0 +1,97 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -run-pass=si-lower-sgpr-spills,greedy,si-lower-wwm-copies,virtregrewriter,prologepilog -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +--- +name: widget +tracksRegLiveness: true +frameInfo: + adjustsStack: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } + - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill } +machineFunctionInfo: + hasSpilledSGPRs: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + ; GCN-LABEL: name: widget + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.1(0x80000000) + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 -1 + ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF + ; GCN-NEXT: $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62 + ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $agpr0 = COPY killed renamable $vgpr62 + ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg + ; GCN-NEXT: renamable $vgpr62 = IMPLICIT_DEF + ; GCN-NEXT: dead renamable $vgpr62 = V_AND_B32_e32 1, killed $vgpr62, implicit $exec + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN-NEXT: liveins: $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) + ; GCN-NEXT: liveins: $agpr0, $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN-NEXT: S_BRANCH %bb.4 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.3: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $agpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $noreg = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: renamable $vgpr62 = COPY renamable $agpr0 + ; GCN-NEXT: $exec = S_MOV_B64 killed $noreg + ; GCN-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr62, 1 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.4: + ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; GCN-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec + ; GCN-NEXT: $exec = S_MOV_B64 -1 + ; GCN-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5) + ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5 + ; GCN-NEXT: SI_RETURN + bb.0: + liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15 + + %45:vgpr_32 = IMPLICIT_DEF + SI_SPILL_S32_SAVE $sgpr15, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) + %16:vgpr_32 = V_AND_B32_e32 1, %45, implicit $exec + + bb.1: + successors: %bb.3, %bb.2 + + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + successors: %bb.4(0x04000000), %bb.1(0x7c000000) + liveins: $sgpr86, $sgpr87, $sgpr66_sgpr67, $sgpr68_sgpr69, $sgpr70_sgpr71, $sgpr80_sgpr81, $sgpr82_sgpr83, $sgpr84_sgpr85, $sgpr96_sgpr97, $sgpr98_sgpr99 + + S_CBRANCH_EXECNZ %bb.1, implicit $exec + S_BRANCH %bb.4 + + bb.3: + ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + $sgpr14 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5) + ADJCALLSTACKDOWN 0, 28, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 + S_BRANCH %bb.2 + + bb.4: + SI_RETURN + +... |