diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2025-09-23 21:32:23 +0900 |
---|---|---|
committer | Matt Arsenault <arsenm2@gmail.com> | 2025-09-27 00:47:59 +0900 |
commit | 205e4cde0fffb25a80c24de26a4667171e6a568f (patch) | |
tree | 4a675b5f5b7c14a6380f5ef932768742e6961948 | |
parent | 6d1c19bb1926fae44619a3531b2e0d0b1259cd25 (diff) | |
download | llvm-users/arsenm/greedy/trySplitAroundHintReg-subreg.zip llvm-users/arsenm/greedy/trySplitAroundHintReg-subreg.tar.gz llvm-users/arsenm/greedy/trySplitAroundHintReg-subreg.tar.bz2 |
RegAllocGreedy: Check if copied lanes are live in trySplitAroundHintRegusers/arsenm/greedy/trySplitAroundHintReg-subreg
For subregister copies, do a subregister live check instead of checking
the main range. Doesn't do much yet, the split analysis still does not
track live ranges.
-rw-r--r-- | llvm/lib/CodeGen/RegAllocGreedy.cpp | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/SystemZ/fp-cmp-04.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fshl.ll | 81 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fshr.ll | 90 |
4 files changed, 109 insertions, 90 deletions
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 8e6cf3e..6e0585b 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1406,8 +1406,28 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint, continue; // Check if VirtReg interferes with OtherReg after this COPY instruction. - if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot())) - continue; + if (Opnd.readsReg()) { + SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot(); + + if (SubReg) { + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + if (IsDef) + Mask = ~Mask; + + if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) { + if ((S.LaneMask & Mask).any()) { + if (S.liveAt(Index)) + return true; + } + return false; + })) { + continue; + } + } else { + if (VirtReg.liveAt(Index)) + continue; + } + } MCRegister OtherPhysReg = OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll index d3d6413..eb7c1b6 100644 --- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll @@ -235,7 +235,7 @@ define half @f12_half(half %dummy, half %val, ptr %dest) { ; CHECK-NEXT: blah %f0 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT -; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: ltebr %f1, %f0 ; CHECK-NEXT: jl .LBB11_2 ; CHECK-NEXT:# %bb.1: ; CHECK-NEXT: lgdr %r0, %f8 @@ -344,7 +344,7 @@ define half @f15_half(half %val, half %dummy, ptr %dest) { ; CHECK-NEXT: blah %f2 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT -; CHECK-NEXT: ltebr %f0, %f0 +; CHECK-NEXT: ltebr %f1, %f0 ; CHECK-NEXT: jl .LBB15_2 ; CHECK-NEXT:# %bb.1: ; CHECK-NEXT: lgdr %r0, %f8 diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll index ec1b8a3..f998128 100644 --- a/llvm/test/CodeGen/X86/fshl.ll +++ b/llvm/test/CodeGen/X86/fshl.ll @@ -335,84 +335,83 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: pushl %esi ; X86-SLOW-NEXT: andl $-16, %esp ; X86-SLOW-NEXT: subl $32, %esp -; X86-SLOW-NEXT: movl 24(%ebp), %esi +; X86-SLOW-NEXT: movl 24(%ebp), %edi ; X86-SLOW-NEXT: movl 28(%ebp), %eax ; X86-SLOW-NEXT: movl 48(%ebp), %edx ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: testb $64, %cl -; X86-SLOW-NEXT: movl 52(%ebp), %edi +; X86-SLOW-NEXT: movl 52(%ebp), %ebx ; X86-SLOW-NEXT: jne .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: ; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %esi, %edx -; X86-SLOW-NEXT: movl 32(%ebp), %esi -; X86-SLOW-NEXT: movl %edi, %ecx -; X86-SLOW-NEXT: movl %eax, %edi +; X86-SLOW-NEXT: movl %edi, %edx +; X86-SLOW-NEXT: movl 32(%ebp), %edi +; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, %ebx ; X86-SLOW-NEXT: movl 36(%ebp), %eax ; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: ; X86-SLOW-NEXT: movl 40(%ebp), %ecx ; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl 44(%ebp), %ecx +; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: .LBB6_3: -; X86-SLOW-NEXT: movl 56(%ebp), %ebx -; X86-SLOW-NEXT: testb $32, %bl +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: testb $32, %cl ; X86-SLOW-NEXT: jne .LBB6_4 ; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: movl %ecx, %ebx ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %edi +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: jmp .LBB6_6 ; X86-SLOW-NEXT: .LBB6_4: -; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ecx, %edx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %ebx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-SLOW-NEXT: .LBB6_6: -; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl %edi, %eax +; X86-SLOW-NEXT: shll %cl, %eax +; X86-SLOW-NEXT: shrl %esi +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: notb %dl +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shrl %cl, %esi +; X86-SLOW-NEXT: orl %eax, %esi +; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ebx, %eax ; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: movl %ebx, %edi +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shll %cl, %eax ; X86-SLOW-NEXT: shrl %edi -; X86-SLOW-NEXT: movl %ecx, %ebx -; X86-SLOW-NEXT: notb %bl -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: orl %esi, %edi +; X86-SLOW-NEXT: orl %eax, %edi ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; X86-SLOW-NEXT: movl %esi, %eax ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: shrl %edx -; X86-SLOW-NEXT: movl %ebx, %ecx -; X86-SLOW-NEXT: shrl %cl, %edx -; X86-SLOW-NEXT: orl %eax, %edx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-SLOW-NEXT: movl %ebx, %eax +; X86-SLOW-NEXT: shrl %ebx +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: orl %eax, %ebx ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-SLOW-NEXT: shll %cl, %eax ; X86-SLOW-NEXT: shrl %esi -; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shrl %cl, %esi ; X86-SLOW-NEXT: orl %eax, %esi -; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SLOW-NEXT: shll %cl, %eax -; X86-SLOW-NEXT: shrl %ebx -; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload -; X86-SLOW-NEXT: shrl %cl, %ebx -; X86-SLOW-NEXT: orl %eax, %ebx ; X86-SLOW-NEXT: movl 8(%ebp), %eax -; X86-SLOW-NEXT: movl %ebx, 12(%eax) -; X86-SLOW-NEXT: movl %esi, 8(%eax) -; X86-SLOW-NEXT: movl %edx, 4(%eax) -; X86-SLOW-NEXT: movl %edi, (%eax) +; X86-SLOW-NEXT: movl %esi, 12(%eax) +; X86-SLOW-NEXT: movl %ebx, 8(%eax) +; X86-SLOW-NEXT: movl %edi, 4(%eax) +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-SLOW-NEXT: movl %ecx, (%eax) ; X86-SLOW-NEXT: leal -12(%ebp), %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll index 544ab7f..c307833 100644 --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -322,79 +322,79 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind { ; X86-SLOW-NEXT: subl $16, %esp ; X86-SLOW-NEXT: movl 24(%ebp), %edx ; X86-SLOW-NEXT: movl 28(%ebp), %esi -; X86-SLOW-NEXT: movl 48(%ebp), %ebx +; X86-SLOW-NEXT: movl 48(%ebp), %edi ; X86-SLOW-NEXT: movl 56(%ebp), %eax ; X86-SLOW-NEXT: testb $64, %al -; X86-SLOW-NEXT: movl 52(%ebp), %edi +; X86-SLOW-NEXT: movl 52(%ebp), %eax ; X86-SLOW-NEXT: je .LBB6_1 ; X86-SLOW-NEXT: # %bb.2: -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl %edx, %ebx +; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl %edx, %edi ; X86-SLOW-NEXT: movl 32(%ebp), %edx -; X86-SLOW-NEXT: movl %edi, %eax -; X86-SLOW-NEXT: movl %esi, %edi +; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: movl %esi, %eax ; X86-SLOW-NEXT: movl 36(%ebp), %esi ; X86-SLOW-NEXT: jmp .LBB6_3 ; X86-SLOW-NEXT: .LBB6_1: -; X86-SLOW-NEXT: movl 40(%ebp), %eax -; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-SLOW-NEXT: movl 44(%ebp), %eax +; X86-SLOW-NEXT: movl 40(%ebp), %ecx +; X86-SLOW-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-SLOW-NEXT: movl 44(%ebp), %ecx ; X86-SLOW-NEXT: .LBB6_3: -; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: testb $32, %cl +; X86-SLOW-NEXT: movl 56(%ebp), %ebx +; X86-SLOW-NEXT: testb $32, %bl ; X86-SLOW-NEXT: je .LBB6_4 ; X86-SLOW-NEXT: # %bb.5: -; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl %ecx, %ebx ; X86-SLOW-NEXT: jmp .LBB6_6 ; X86-SLOW-NEXT: .LBB6_4: ; X86-SLOW-NEXT: movl %edx, %esi +; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SLOW-NEXT: movl %eax, %ebx -; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-SLOW-NEXT: movl %ecx, %edi +; X86-SLOW-NEXT: movl (%esp), %ebx # 4-byte Reload ; X86-SLOW-NEXT: .LBB6_6: -; X86-SLOW-NEXT: shrl %cl, %eax -; X86-SLOW-NEXT: movl %eax, %edx -; X86-SLOW-NEXT: movl %ecx, %eax -; X86-SLOW-NEXT: notb %al -; X86-SLOW-NEXT: movl %ebx, %edi -; X86-SLOW-NEXT: addl %ebx, %ebx -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: orl %edx, %ebx -; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: movl 56(%ebp), %ecx -; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: shrl %cl, %edi -; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx -; X86-SLOW-NEXT: movl %eax, %ecx -; X86-SLOW-NEXT: shll %cl, %edx -; X86-SLOW-NEXT: orl %edi, %edx +; X86-SLOW-NEXT: shrl %cl, %ebx +; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: notb %dl +; X86-SLOW-NEXT: movl %edi, %eax +; X86-SLOW-NEXT: addl %edi, %edi +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: orl %ebx, %edi +; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-SLOW-NEXT: shrl %cl, %ebx -; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: shrl %cl, %eax ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; X86-SLOW-NEXT: leal (%edi,%edi), %ebx -; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %ebx -; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; X86-SLOW-NEXT: orl %eax, %ebx ; X86-SLOW-NEXT: movl 56(%ebp), %ecx ; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-SLOW-NEXT: shrl %cl, %edi +; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SLOW-NEXT: leal (%eax,%eax), %edi +; X86-SLOW-NEXT: movl %edx, %ecx +; X86-SLOW-NEXT: shll %cl, %edi +; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload +; X86-SLOW-NEXT: movl 56(%ebp), %ecx +; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-SLOW-NEXT: shrl %cl, %eax ; X86-SLOW-NEXT: addl %esi, %esi -; X86-SLOW-NEXT: movl %eax, %ecx +; X86-SLOW-NEXT: movl %edx, %ecx ; X86-SLOW-NEXT: shll %cl, %esi -; X86-SLOW-NEXT: orl %edi, %esi -; X86-SLOW-NEXT: movl 8(%ebp), %ecx -; X86-SLOW-NEXT: movl %esi, 12(%ecx) -; X86-SLOW-NEXT: movl %ebx, 8(%ecx) -; X86-SLOW-NEXT: movl %edx, 4(%ecx) -; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-SLOW-NEXT: movl %eax, (%ecx) -; X86-SLOW-NEXT: movl %ecx, %eax +; X86-SLOW-NEXT: orl %eax, %esi +; X86-SLOW-NEXT: movl 8(%ebp), %eax +; X86-SLOW-NEXT: movl %esi, 12(%eax) +; X86-SLOW-NEXT: movl %edi, 8(%eax) +; X86-SLOW-NEXT: movl %ebx, 4(%eax) +; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-SLOW-NEXT: movl %ecx, (%eax) ; X86-SLOW-NEXT: leal -12(%ebp), %esp ; X86-SLOW-NEXT: popl %esi ; X86-SLOW-NEXT: popl %edi |