aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2023-04-17 11:06:06 +0100
committerJay Foad <jay.foad@amd.com>2023-04-27 09:40:06 +0100
commit47d3cbcf842a036c20c3f1c74255cdfc213f41c2 (patch)
tree7954d0895359047d2b4e4ee369941f2610227169
parent12b70ad68ccf3e9415cb6c42f8eb73e883f159ae (diff)
downloadllvm-47d3cbcf842a036c20c3f1c74255cdfc213f41c2.zip
llvm-47d3cbcf842a036c20c3f1c74255cdfc213f41c2.tar.gz
llvm-47d3cbcf842a036c20c3f1c74255cdfc213f41c2.tar.bz2
[BranchFolder] Skip redundant IMPLICIT_DEFs of subregs
Differential Revision: https://reviews.llvm.org/D148509
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp8
-rw-r--r--llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll14
2 files changed, 8 insertions, 14 deletions
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 1b78f50..a5302a6 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -860,6 +860,14 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
for (Register Reg : NewLiveIns) {
if (!LiveRegs.available(*MRI, Reg))
continue;
+
+ // Skip the register if we are about to add one of its super registers.
+ // TODO: Common this up with the same logic in addLineIns().
+ if (any_of(TRI->superregs(Reg), [&](MCPhysReg SReg) {
+ return NewLiveIns.contains(SReg) && !MRI->isReserved(SReg);
+ }))
+ continue;
+
DebugLoc DL;
BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
Reg);
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 6d0dad2..c64ed7f 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -37,20 +37,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr30_sgpr31, implicit-def dead $scc
; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr24_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr24_hi16 = IMPLICIT_DEF
; GFX90A-NEXT: $agpr0 = IMPLICIT_DEF
- ; GFX90A-NEXT: $agpr0_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $agpr0_hi16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr26_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr26_hi16 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr26 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr20_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr20_hi16 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr22_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr22_hi16 = IMPLICIT_DEF
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.58, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.2:
@@ -583,11 +573,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr63, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc
; GFX90A-NEXT: $agpr0 = IMPLICIT_DEF
- ; GFX90A-NEXT: $agpr0_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $agpr0_hi16 = IMPLICIT_DEF
; GFX90A-NEXT: $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr14_lo16 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr14_hi16 = IMPLICIT_DEF
; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.48, implicit $vcc
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: bb.44: