aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp11
-rw-r--r--llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll45
2 files changed, 50 insertions, 6 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7e041b0..fde07d8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2757,7 +2757,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
bool FPOffsetFits = !ForSimm || FPOffset >= -256;
PreferFP |= Offset > -FPOffset && !SVEStackSize;
- if (MFI.hasVarSizedObjects()) {
+ if (FPOffset >= 0) {
+ // If the FPOffset is positive, that'll always be best, as the SP/BP
+ // will be even further away.
+ UseFP = true;
+ } else if (MFI.hasVarSizedObjects()) {
// If we have variable sized objects, we can use either FP or BP, as the
// SP offset is unknown. We can use the base pointer if we have one and
// FP is not preferred. If not, we're stuck with using FP.
@@ -2769,11 +2773,6 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// else we can use BP and FP, but the offset from FP won't fit.
// That will make us scavenge registers which we can probably avoid by
// using BP. If it won't fit for BP either, we'll scavenge anyway.
- } else if (FPOffset >= 0) {
- // Use SP or FP, whichever gives us the best chance of the offset
- // being in range for direct access. If the FPOffset is positive,
- // that'll always be best, as the SP will be even further away.
- UseFP = true;
} else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
// Funclets access the locals contained in the parent's stack frame
// via the frame pointer, so we have to use the FP in the parent
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
index ac19bd5..803bb9f 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll
@@ -45,6 +45,51 @@ define void @test_no_stackslot_scavenging(float %f) #0 {
ret void
}
+define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-pointer"="all" {
+; CHECK-LABEL: test_no_stackslot_scavenging_with_fp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp d15, d14, [sp, #-128]! // 16-byte Folded Spill
+; CHECK-NEXT: cntd x9
+; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #64
+; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x25, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x19, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: lsl x9, x0, #3
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: str s0, [x29, #28] // 4-byte Folded Spill
+; CHECK-NEXT: add x9, x9, #15
+; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0
+; CHECK-NEXT: sub x8, x8, x9
+; CHECK-NEXT: mov sp, x8
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: smstop sm
+; CHECK-NEXT: ldr s0, [x29, #28] // 4-byte Folded Reload
+; CHECK-NEXT: bl use_f
+; CHECK-NEXT: smstart sm
+; CHECK-NEXT: sub sp, x29, #64
+; CHECK-NEXT: ldp x24, x19, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x25, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #128 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %ptr2 = alloca i64, i64 %n, align 8
+ %ptr = alloca <vscale x 16 x i8>
+ call void asm sideeffect "", "~{x24},~{x25}"() nounwind
+ call void @use_f(float %f)
+ ret void
+}
+
declare void @use_f(float)
+declare void @use_f_and_ptr(float, ptr)
attributes #0 = { nounwind "target-features"="+sve,+sme" "aarch64_pstate_sm_enabled" }