diff options
author | Jay Foad <jay.foad@amd.com> | 2023-04-17 11:06:06 +0100 |
---|---|---|
committer | Jay Foad <jay.foad@amd.com> | 2023-04-27 09:40:06 +0100 |
commit | 47d3cbcf842a036c20c3f1c74255cdfc213f41c2 (patch) | |
tree | 7954d0895359047d2b4e4ee369941f2610227169 | |
parent | 12b70ad68ccf3e9415cb6c42f8eb73e883f159ae (diff) | |
download | llvm-47d3cbcf842a036c20c3f1c74255cdfc213f41c2.zip llvm-47d3cbcf842a036c20c3f1c74255cdfc213f41c2.tar.gz llvm-47d3cbcf842a036c20c3f1c74255cdfc213f41c2.tar.bz2 |
[BranchFolder] Skip redundant IMPLICIT_DEFs of subregs
Differential Revision: https://reviews.llvm.org/D148509
-rw-r--r-- | llvm/lib/CodeGen/BranchFolding.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll | 14 |
2 files changed, 8 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 1b78f50..a5302a6 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -860,6 +860,14 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) { for (Register Reg : NewLiveIns) { if (!LiveRegs.available(*MRI, Reg)) continue; + + // Skip the register if we are about to add one of its super registers. + // TODO: Common this up with the same logic in addLineIns(). + if (any_of(TRI->superregs(Reg), [&](MCPhysReg SReg) { + return NewLiveIns.contains(SReg) && !MRI->isReserved(SReg); + })) + continue; + DebugLoc DL; BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg); diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index 6d0dad2..c64ed7f 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -37,20 +37,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc ; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr24_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr24_hi16 = IMPLICIT_DEF ; GFX90A-NEXT: $agpr0 = IMPLICIT_DEF - ; GFX90A-NEXT: $agpr0_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $agpr0_hi16 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr26_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr26_hi16 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr26 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr20_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr20_hi16 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr22_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr22_hi16 = IMPLICIT_DEF ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.58, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.2: @@ -583,11 +573,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc ; GFX90A-NEXT: $agpr0 = IMPLICIT_DEF - ; GFX90A-NEXT: $agpr0_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $agpr0_hi16 = IMPLICIT_DEF ; GFX90A-NEXT: $vgpr14 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr14_lo16 = IMPLICIT_DEF - ; GFX90A-NEXT: $vgpr14_hi16 = IMPLICIT_DEF ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.48, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.44: |