diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2024-09-06 18:18:27 +0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-06 18:18:27 +0400 |
commit | a9daad8280c081ee15c16cf8515630816695fb0e (patch) | |
tree | 7e53c05dc3b812fad3e9c3d401fa22ad609c5e19 | |
parent | 6ab5829ab7f03417ccb13e75d68b241871701be1 (diff) | |
download | llvm-a9daad8280c081ee15c16cf8515630816695fb0e.zip llvm-a9daad8280c081ee15c16cf8515630816695fb0e.tar.gz llvm-a9daad8280c081ee15c16cf8515630816695fb0e.tar.bz2 |
AMDGPU: Update live intervals in convertToThreeAddress (#104610)
Fixes #98741
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 40 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir | 142 |
2 files changed, 156 insertions, 26 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 90e11df..c6f28af 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4059,17 +4059,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, !RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { MachineInstr *DefMI; const auto killDef = [&]() -> void { - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); // The only user is the instruction which will be killed. Register DefReg = DefMI->getOperand(0).getReg(); - if (!MRI.hasOneNonDBGUse(DefReg)) - return; - // We cannot just remove the DefMI here, calling pass will crash. - DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); - for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) - DefMI->removeOperand(I); - if (LV) - LV->getVarInfo(DefReg).AliveBlocks.clear(); + + if (MRI.hasOneNonDBGUse(DefReg)) { + // We cannot just remove the DefMI here, calling pass will crash. + DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); + DefMI->getOperand(0).setIsDead(true); + for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) + DefMI->removeOperand(I); + if (LV) + LV->getVarInfo(DefReg).AliveBlocks.clear(); + } + + if (LIS) { + LiveInterval &DefLI = LIS->getInterval(DefReg); + + // We cannot delete the original instruction here, so hack out the use + // in the original instruction with a dummy register so we can use + // shrinkToUses to deal with any multi-use edge cases. Other targets do + // not have the complexity of deleting a use to consider here. + Register DummyReg = MRI.cloneVirtualRegister(DefReg); + for (MachineOperand &MIOp : MI.uses()) { + if (MIOp.isReg() && MIOp.getReg() == DefReg) { + MIOp.setIsUndef(true); + MIOp.setReg(DummyReg); + } + } + + LIS->shrinkToUses(&DefLI); + } }; int64_t Imm; @@ -4107,6 +4127,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(*Src2) .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); + if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); killDef(); @@ -4129,6 +4150,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, .add(*Src2) .setMIFlags(MI.getFlags()); updateLiveVariables(LV, MI, *MIB); + if (LIS) LIS->ReplaceMachineInstrInMaps(MI, *MIB); if (DefMI) diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir index 1768e39..f814dd3 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir @@ -1,33 +1,138 @@ -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s + # GFX10-LABEL: name: test_fmamk_reg_imm_f16 -# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF # GFX10-NOT: V_MOV_B32 -# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_reg_imm_f16 -registers: - - { id: 0, class: vreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: vgpr_32 } +tracksRegLiveness: true body: | bb.0: - %0 = IMPLICIT_DEF - %1 = COPY %0.sub1 - %2 = V_MOV_B32_e32 1078523331, implicit $exec - %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16__imm_is_subreg +tracksRegLiveness: true +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + undef %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec +# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e +# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e +--- +name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined +tracksRegLiveness: true +body: | + bb.0: + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec + %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_mac +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10: S_NOP 0, implicit %2 +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16__use_imm_before_mac +tracksRegLiveness: true +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + S_NOP 0, implicit %2 + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_after_mac +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec +--- +name: test_fmamk_reg_imm_f16__use_imm_after_mac +tracksRegLiveness: true +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + S_NOP 0, implicit %2 + +... + +# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_after_mac +# GFX10: %0:vreg_64 = IMPLICIT_DEF +# GFX10: %1:vgpr_32 = COPY %0.sub1 +# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec +# GFX10: S_NOP 0, implicit %2 +# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec +# GFX10: S_NOP 0, implicit %2 + +--- +name: test_fmamk_reg_imm_f16__use_imm_before_after_mac +tracksRegLiveness: true +body: | + bb.0: + + %0:vreg_64 = IMPLICIT_DEF + %1:vgpr_32 = COPY %0.sub1 + %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec + S_NOP 0, implicit %2 + %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec + S_NOP 0, implicit %2 ... # GFX10-LABEL: name: test_fmamk_imm_reg_f16 -# GFX10: %2:vgpr_32 = IMPLICIT_DEF +# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF # GFX10-NOT: V_MOV_B32 -# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec +# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec --- name: test_fmamk_imm_reg_f16 +tracksRegLiveness: true registers: - { id: 0, class: vreg_64 } - { id: 1, class: vgpr_32 } @@ -46,9 +151,11 @@ body: | # GFX10-LABEL: name: test_fmaak_f16 # GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec # GFX10-NOT: V_MOV_B32 -# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec +# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec +# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec --- name: test_fmaak_f16 +tracksRegLiveness: true registers: - { id: 0, class: vreg_64 } - { id: 1, class: vgpr_32 } @@ -65,7 +172,8 @@ body: | # GFX10-LABEL: name: test_fmaak_inline_literal_f16 # GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec # GFX10-NOT: V_MOV_B32 -# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec +# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec +# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec --- name: test_fmaak_inline_literal_f16 |