aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2025-09-23 21:32:23 +0900
committerMatt Arsenault <arsenm2@gmail.com>2025-09-27 00:47:59 +0900
commit205e4cde0fffb25a80c24de26a4667171e6a568f (patch)
tree4a675b5f5b7c14a6380f5ef932768742e6961948
parent6d1c19bb1926fae44619a3531b2e0d0b1259cd25 (diff)
downloadllvm-users/arsenm/greedy/trySplitAroundHintReg-subreg.zip
llvm-users/arsenm/greedy/trySplitAroundHintReg-subreg.tar.gz
llvm-users/arsenm/greedy/trySplitAroundHintReg-subreg.tar.bz2
RegAllocGreedy: Check if copied lanes are live in trySplitAroundHintRegusers/arsenm/greedy/trySplitAroundHintReg-subreg
For subregister copies, do a subregister live check instead of checking the main range. Doesn't do much yet, the split analysis still does not track live ranges.
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp24
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-cmp-04.ll4
-rw-r--r--llvm/test/CodeGen/X86/fshl.ll81
-rw-r--r--llvm/test/CodeGen/X86/fshr.ll90
4 files changed, 109 insertions, 90 deletions
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 8e6cf3e..6e0585b 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -1406,8 +1406,28 @@ bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint,
continue;
// Check if VirtReg interferes with OtherReg after this COPY instruction.
- if (!IsDef && VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot()))
- continue;
+ if (Opnd.readsReg()) {
+ SlotIndex Index = LIS->getInstructionIndex(Instr).getRegSlot();
+
+ if (SubReg) {
+ LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
+ if (IsDef)
+ Mask = ~Mask;
+
+ if (any_of(VirtReg.subranges(), [=](const LiveInterval::SubRange &S) {
+ if ((S.LaneMask & Mask).any()) {
+ if (S.liveAt(Index))
+ return true;
+ }
+ return false;
+ })) {
+ continue;
+ }
+ } else {
+ if (VirtReg.liveAt(Index))
+ continue;
+ }
+ }
MCRegister OtherPhysReg =
OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg);
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
index d3d6413..eb7c1b6 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-04.ll
@@ -235,7 +235,7 @@ define half @f12_half(half %dummy, half %val, ptr %dest) {
; CHECK-NEXT: blah %f0
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB11_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
@@ -344,7 +344,7 @@ define half @f15_half(half %val, half %dummy, ptr %dest) {
; CHECK-NEXT: blah %f2
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: brasl %r14, __extendhfsf2@PLT
-; CHECK-NEXT: ltebr %f0, %f0
+; CHECK-NEXT: ltebr %f1, %f0
; CHECK-NEXT: jl .LBB15_2
; CHECK-NEXT:# %bb.1:
; CHECK-NEXT: lgdr %r0, %f8
diff --git a/llvm/test/CodeGen/X86/fshl.ll b/llvm/test/CodeGen/X86/fshl.ll
index ec1b8a3..f998128 100644
--- a/llvm/test/CodeGen/X86/fshl.ll
+++ b/llvm/test/CodeGen/X86/fshl.ll
@@ -335,84 +335,83 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: pushl %esi
; X86-SLOW-NEXT: andl $-16, %esp
; X86-SLOW-NEXT: subl $32, %esp
-; X86-SLOW-NEXT: movl 24(%ebp), %esi
+; X86-SLOW-NEXT: movl 24(%ebp), %edi
; X86-SLOW-NEXT: movl 28(%ebp), %eax
; X86-SLOW-NEXT: movl 48(%ebp), %edx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: testb $64, %cl
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %ebx
; X86-SLOW-NEXT: jne .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, %edx
-; X86-SLOW-NEXT: movl 32(%ebp), %esi
-; X86-SLOW-NEXT: movl %edi, %ecx
-; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edi, %edx
+; X86-SLOW-NEXT: movl 32(%ebp), %edi
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, %ebx
; X86-SLOW-NEXT: movl 36(%ebp), %eax
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
; X86-SLOW-NEXT: movl 40(%ebp), %ecx
; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl 44(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ebx
-; X86-SLOW-NEXT: testb $32, %bl
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: testb $32, %cl
; X86-SLOW-NEXT: jne .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
-; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ecx, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: shll %cl, %eax
+; X86-SLOW-NEXT: shrl %esi
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ebx, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: movl %ebx, %edi
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %edi
-; X86-SLOW-NEXT: movl %ecx, %ebx
-; X86-SLOW-NEXT: notb %bl
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: orl %esi, %edi
+; X86-SLOW-NEXT: orl %eax, %edi
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %edx
-; X86-SLOW-NEXT: movl %ebx, %ecx
-; X86-SLOW-NEXT: shrl %cl, %edx
-; X86-SLOW-NEXT: orl %eax, %edx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: movl %ebx, %eax
+; X86-SLOW-NEXT: shrl %ebx
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-SLOW-NEXT: shll %cl, %eax
; X86-SLOW-NEXT: shrl %esi
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shrl %cl, %esi
; X86-SLOW-NEXT: orl %eax, %esi
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-SLOW-NEXT: shll %cl, %eax
-; X86-SLOW-NEXT: shrl %ebx
-; X86-SLOW-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 8(%ebp), %eax
-; X86-SLOW-NEXT: movl %ebx, 12(%eax)
-; X86-SLOW-NEXT: movl %esi, 8(%eax)
-; X86-SLOW-NEXT: movl %edx, 4(%eax)
-; X86-SLOW-NEXT: movl %edi, (%eax)
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %ebx, 8(%eax)
+; X86-SLOW-NEXT: movl %edi, 4(%eax)
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll
index 544ab7f..c307833 100644
--- a/llvm/test/CodeGen/X86/fshr.ll
+++ b/llvm/test/CodeGen/X86/fshr.ll
@@ -322,79 +322,79 @@ define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
; X86-SLOW-NEXT: subl $16, %esp
; X86-SLOW-NEXT: movl 24(%ebp), %edx
; X86-SLOW-NEXT: movl 28(%ebp), %esi
-; X86-SLOW-NEXT: movl 48(%ebp), %ebx
+; X86-SLOW-NEXT: movl 48(%ebp), %edi
; X86-SLOW-NEXT: movl 56(%ebp), %eax
; X86-SLOW-NEXT: testb $64, %al
-; X86-SLOW-NEXT: movl 52(%ebp), %edi
+; X86-SLOW-NEXT: movl 52(%ebp), %eax
; X86-SLOW-NEXT: je .LBB6_1
; X86-SLOW-NEXT: # %bb.2:
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl %edx, %ebx
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl %edx, %edi
; X86-SLOW-NEXT: movl 32(%ebp), %edx
-; X86-SLOW-NEXT: movl %edi, %eax
-; X86-SLOW-NEXT: movl %esi, %edi
+; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %esi, %eax
; X86-SLOW-NEXT: movl 36(%ebp), %esi
; X86-SLOW-NEXT: jmp .LBB6_3
; X86-SLOW-NEXT: .LBB6_1:
-; X86-SLOW-NEXT: movl 40(%ebp), %eax
-; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-SLOW-NEXT: movl 44(%ebp), %eax
+; X86-SLOW-NEXT: movl 40(%ebp), %ecx
+; X86-SLOW-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X86-SLOW-NEXT: movl 44(%ebp), %ecx
; X86-SLOW-NEXT: .LBB6_3:
-; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: testb $32, %cl
+; X86-SLOW-NEXT: movl 56(%ebp), %ebx
+; X86-SLOW-NEXT: testb $32, %bl
; X86-SLOW-NEXT: je .LBB6_4
; X86-SLOW-NEXT: # %bb.5:
-; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl %ecx, %ebx
; X86-SLOW-NEXT: jmp .LBB6_6
; X86-SLOW-NEXT: .LBB6_4:
; X86-SLOW-NEXT: movl %edx, %esi
+; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-SLOW-NEXT: movl %eax, %ebx
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, %edi
+; X86-SLOW-NEXT: movl (%esp), %ebx # 4-byte Reload
; X86-SLOW-NEXT: .LBB6_6:
-; X86-SLOW-NEXT: shrl %cl, %eax
-; X86-SLOW-NEXT: movl %eax, %edx
-; X86-SLOW-NEXT: movl %ecx, %eax
-; X86-SLOW-NEXT: notb %al
-; X86-SLOW-NEXT: movl %ebx, %edi
-; X86-SLOW-NEXT: addl %ebx, %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl %edx, %ebx
-; X86-SLOW-NEXT: movl %ebx, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
-; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %edi
-; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-SLOW-NEXT: leal (%ebx,%ebx), %edx
-; X86-SLOW-NEXT: movl %eax, %ecx
-; X86-SLOW-NEXT: shll %cl, %edx
-; X86-SLOW-NEXT: orl %edi, %edx
+; X86-SLOW-NEXT: shrl %cl, %ebx
+; X86-SLOW-NEXT: movl %ecx, %edx
+; X86-SLOW-NEXT: notb %dl
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: addl %edi, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl %ebx, %edi
+; X86-SLOW-NEXT: movl %edi, (%esp) # 4-byte Spill
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-SLOW-NEXT: shrl %cl, %ebx
-; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-SLOW-NEXT: leal (%edi,%edi), %ebx
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %ebx
-; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-SLOW-NEXT: orl %eax, %ebx
; X86-SLOW-NEXT: movl 56(%ebp), %ecx
; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-SLOW-NEXT: leal (%eax,%eax), %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-SLOW-NEXT: movl 56(%ebp), %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %eax
; X86-SLOW-NEXT: addl %esi, %esi
-; X86-SLOW-NEXT: movl %eax, %ecx
+; X86-SLOW-NEXT: movl %edx, %ecx
; X86-SLOW-NEXT: shll %cl, %esi
-; X86-SLOW-NEXT: orl %edi, %esi
-; X86-SLOW-NEXT: movl 8(%ebp), %ecx
-; X86-SLOW-NEXT: movl %esi, 12(%ecx)
-; X86-SLOW-NEXT: movl %ebx, 8(%ecx)
-; X86-SLOW-NEXT: movl %edx, 4(%ecx)
-; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
-; X86-SLOW-NEXT: movl %eax, (%ecx)
-; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: orl %eax, %esi
+; X86-SLOW-NEXT: movl 8(%ebp), %eax
+; X86-SLOW-NEXT: movl %esi, 12(%eax)
+; X86-SLOW-NEXT: movl %edi, 8(%eax)
+; X86-SLOW-NEXT: movl %ebx, 4(%eax)
+; X86-SLOW-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X86-SLOW-NEXT: movl %ecx, (%eax)
; X86-SLOW-NEXT: leal -12(%ebp), %esp
; X86-SLOW-NEXT: popl %esi
; X86-SLOW-NEXT: popl %edi