diff options
author | macurtis-amd <macurtis@amd.com> | 2025-07-16 06:37:08 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-16 06:37:08 -0500 |
commit | 402b989693a0d5d17be6bf996bce52cf3ca73886 (patch) | |
tree | e4bf3ff6c0e7ca5c5e681203a9cb84e3cf029fd0 | |
parent | 3b8a18c27a1e70895feac15d48b3a6122e6b377f (diff) | |
download | llvm-402b989693a0d5d17be6bf996bce52cf3ca73886.zip llvm-402b989693a0d5d17be6bf996bce52cf3ca73886.tar.gz llvm-402b989693a0d5d17be6bf996bce52cf3ca73886.tar.bz2 |
AMDGPU: Fix assert when multi operands to update after folding imm (#148205)
In the original motivating test case,
[FoldList](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1764)
had entries:
```
#0: UseMI: %224:sreg_32 = S_OR_B32 %219.sub0:sreg_64, %219.sub1:sreg_64, implicit-def dead $scc
UseOpNo: 1
#1: UseMI: %224:sreg_32 = S_OR_B32 %219.sub0:sreg_64, %219.sub1:sreg_64, implicit-def dead $scc
UseOpNo: 2
```
After calling
[updateOperand(#0)](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1773),
[tryConstantFoldOp(#0.UseMI)](https://github.com/llvm/llvm-project/blob/d8a2141ff98ee35cd1886f536ccc3548b012820b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp#L1786)
removed operand 1, and entry #​1.UseOpNo was no longer valid,
resulting in an
[assert](https://github.com/llvm/llvm-project/blob/4a35214bddbb67f9597a500d48ab8c4fb25af150/llvm/include/llvm/ADT/ArrayRef.h#L452).
This change defers constant folding until all operands have been updated
so that UseOpNo values remain stable.
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir | 15 |
2 files changed, 25 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 0ed06c3..e172c0b 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1761,6 +1761,7 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, for (MachineInstr *Copy : CopiesToReplace) Copy->addImplicitDefUseOperands(*MF); + SetVector<MachineInstr *> ConstantFoldCandidates; for (FoldCandidate &Fold : FoldList) { assert(!Fold.isReg() || Fold.Def.OpToFold); if (Fold.isReg() && Fold.getReg().isVirtual()) { @@ -1783,16 +1784,21 @@ bool SIFoldOperandsImpl::foldInstOperand(MachineInstr &MI, << static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI); - if (Fold.isImm() && tryConstantFoldOp(Fold.UseMI)) { - LLVM_DEBUG(dbgs() << "Constant folded " << *Fold.UseMI); - Changed = true; - } + if (Fold.isImm()) + ConstantFoldCandidates.insert(Fold.UseMI); } else if (Fold.Commuted) { // Restoring instruction's original operand order if fold has failed. TII->commuteInstruction(*Fold.UseMI, false); } } + + for (MachineInstr *MI : ConstantFoldCandidates) { + if (tryConstantFoldOp(MI)) { + LLVM_DEBUG(dbgs() << "Constant folded " << *MI); + Changed = true; + } + } return true; } diff --git a/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir b/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir new file mode 100644 index 0000000..d0c9740 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bug-multi-operands-to-update-after-fold.mir @@ -0,0 +1,15 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx1031 -run-pass=si-fold-operands -o - %s | FileCheck %s +--- +name: snork +body: | + bb.0: + ; CHECK-LABEL: name: snork + ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_]], %subreg.sub3 + ; CHECK-NEXT: SI_RETURN + %0:sreg_32 = S_MOV_B32 0 + %1:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1, %0, %subreg.sub2, %0, %subreg.sub3 + %2:sreg_32 = S_OR_B32 %1.sub0, %1.sub3, implicit-def dead $scc + SI_RETURN +... |