diff options
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll | 45 |
2 files changed, 50 insertions, 6 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 7e041b0..fde07d8 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2757,7 +2757,11 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( bool FPOffsetFits = !ForSimm || FPOffset >= -256; PreferFP |= Offset > -FPOffset && !SVEStackSize; - if (MFI.hasVarSizedObjects()) { + if (FPOffset >= 0) { + // If the FPOffset is positive, that'll always be best, as the SP/BP + // will be even further away. + UseFP = true; + } else if (MFI.hasVarSizedObjects()) { // If we have variable sized objects, we can use either FP or BP, as the // SP offset is unknown. We can use the base pointer if we have one and // FP is not preferred. If not, we're stuck with using FP. @@ -2769,11 +2773,6 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference( // else we can use BP and FP, but the offset from FP won't fit. // That will make us scavenge registers which we can probably avoid by // using BP. If it won't fit for BP either, we'll scavenge anyway. - } else if (FPOffset >= 0) { - // Use SP or FP, whichever gives us the best chance of the offset - // being in range for direct access. If the FPOffset is positive, - // that'll always be best, as the SP will be even further away. - UseFP = true; } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) { // Funclets access the locals contained in the parent's stack frame // via the frame pointer, so we have to use the FP in the parent diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll index ac19bd5..803bb9f 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll @@ -45,6 +45,51 @@ define void @test_no_stackslot_scavenging(float %f) #0 { ret void } +define void @test_no_stackslot_scavenging_with_fp(float %f, i64 %n) #0 "frame-pointer"="all" { +; CHECK-LABEL: test_no_stackslot_scavenging_with_fp: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-128]! // 16-byte Folded Spill +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp x28, x25, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: lsl x9, x0, #3 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: str s0, [x29, #28] // 4-byte Folded Spill +; CHECK-NEXT: add x9, x9, #15 +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldr s0, [x29, #28] // 4-byte Folded Reload +; CHECK-NEXT: bl use_f +; CHECK-NEXT: smstart sm +; CHECK-NEXT: sub sp, x29, #64 +; CHECK-NEXT: ldp x24, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x25, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #128 // 16-byte Folded Reload +; CHECK-NEXT: ret + %ptr2 = alloca i64, i64 %n, align 8 + %ptr = alloca <vscale x 16 x i8> + call void asm sideeffect "", "~{x24},~{x25}"() nounwind + call void @use_f(float %f) + ret void +} + declare void @use_f(float) +declare void @use_f_and_ptr(float, ptr) attributes #0 = { nounwind "target-features"="+sve,+sme" "aarch64_pstate_sm_enabled" } |
