diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2015-05-02 04:17:29 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2015-05-02 04:17:29 +0000 |
commit | 60e45dc30c9e4f4a18febf65c1aa3ea75c2e4eef (patch) | |
tree | 28620ebd834327c35cc6f2531444274e433701d5 | |
parent | 64a3336c2054e47780607c9bae9b7184cfe02f60 (diff) | |
download | llvm-60e45dc30c9e4f4a18febf65c1aa3ea75c2e4eef.zip llvm-60e45dc30c9e4f4a18febf65c1aa3ea75c2e4eef.tar.gz llvm-60e45dc30c9e4f4a18febf65c1aa3ea75c2e4eef.tar.bz2 |
Merging r227519:
------------------------------------------------------------------------
r227519 | reid | 2015-01-29 18:58:04 -0500 (Thu, 29 Jan 2015) | 10 lines
x86: Fix large model calls to __chkstk for dynamic allocas
In the large code model, we now put __chkstk in %r11 before calling it.
Refactor the code so that we only do this once. Simplify things by using
__chkstk_ms instead of __chkstk on cygming. We already use that symbol
in the prolog emission, and it simplifies our logic.
Second half of PR18582.
------------------------------------------------------------------------
llvm-svn: 236378
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.cpp | 97 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86FrameLowering.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 43 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/inalloca-stdcall.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll | 22 |
5 files changed, 84 insertions, 87 deletions
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 2c110b1..41e717f 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -375,18 +375,25 @@ static bool usesTheStack(const MachineFunction &MF) { return false; } -void X86FrameLowering::getStackProbeFunction(const MachineFunction &MF, - const X86Subtarget &STI, - unsigned &CallOp, - const char *&Symbol) { - if (STI.is64Bit()) - CallOp = MF.getTarget().getCodeModel() == CodeModel::Large - ? X86::CALL64r - : X86::W64ALLOCA; +void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) { + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); + bool Is64Bit = STI.is64Bit(); + bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getSubtarget().getRegisterInfo()); + + unsigned CallOp; + if (Is64Bit) + CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; else CallOp = X86::CALLpcrel32; - if (STI.is64Bit()) { + const char *Symbol; + if (Is64Bit) { if (STI.isTargetCygMing()) { Symbol = "___chkstk_ms"; } else { @@ -396,6 +403,37 @@ void X86FrameLowering::getStackProbeFunction(const MachineFunction &MF, Symbol = "_alloca"; else Symbol = "_chkstk"; + + MachineInstrBuilder CI; + + // All current stack probes take AX and SP as input, clobber flags, and + // preserve all registers. x86_64 probes leave RSP unmodified. + if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { + // For the large code model, we have to call through a register. Use R11, + // as it is scratch in all supported calling conventions. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) + .addExternalSymbol(Symbol); + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); + } else { + CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol); + } + + unsigned AX = Is64Bit ? X86::RAX : X86::EAX; + unsigned SP = Is64Bit ? X86::RSP : X86::ESP; + CI.addReg(AX, RegState::Implicit) + .addReg(SP, RegState::Implicit) + .addReg(AX, RegState::Define | RegState::Implicit) + .addReg(SP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + if (Is64Bit) { + // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp + // themselves. It also does not clobber %rax so we can reuse it when + // adjusting %rsp. + BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } } /// emitPrologue - Push callee-saved registers onto the stack, which @@ -728,11 +766,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. if (NumBytes >= StackProbeSize && UseStackProbe) { - const char *StackProbeSymbol; - unsigned CallOp; - - getStackProbeFunction(MF, STI, CallOp, StackProbeSymbol); - // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); @@ -761,33 +794,17 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .setMIFlag(MachineInstr::FrameSetup); } - if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { - // For the large code model, we have to call through a register. Use R11, - // as it is unused and clobbered by all probe functions. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) - .addExternalSymbol(StackProbeSymbol); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addReg(X86::R11) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); - } else { - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit) - .setMIFlag(MachineInstr::FrameSetup); - } + // Save a pointer to the MI where we set AX. + MachineBasicBlock::iterator SetRAX = MBBI; + --SetRAX; + + // Call __chkstk, __chkstk_ms, or __alloca. + emitStackProbeCall(MF, MBB, MBBI, DL); + + // Apply the frame setup flag to all inserted instrs. + for (; SetRAX != MBBI; ++SetRAX) + SetRAX->setFlag(MachineInstr::FrameSetup); - if (Is64Bit) { - // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp - // themself. It also does not clobber %rax so we can reuse it when - // adjusting %rsp. - BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr) - .addReg(StackPtr) - .addReg(X86::RAX) - .setMIFlag(MachineInstr::FrameSetup); - } if (isEAXAlive) { // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 7b1b5e4..dd8fc32 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -27,9 +27,11 @@ public: explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO) : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {} - static void getStackProbeFunction(const MachineFunction &MF, - const X86Subtarget &STI, unsigned &CallOp, - const char *&Symbol); + /// Emit a call to the target's stack probe function. This is required for all + /// large stack allocations on Windows. The caller is required to materialize + /// the number of bytes to probe in RAX/EAX. + static void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, DebugLoc DL); void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b758427..85978d8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15,6 +15,7 @@ #include "X86ISelLowering.h" #include "Utils/X86ShuffleDecode.h" #include "X86CallingConv.h" +#include "X86FrameLowering.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" @@ -21091,47 +21092,7 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, assert(!Subtarget->isTargetMachO()); - // The lowering is pretty easy: we're just emitting the call to _alloca. The - // non-trivial part is impdef of ESP. - - if (Subtarget->isTargetWin64()) { - if (Subtarget->isTargetCygMing()) { - // ___chkstk(Mingw64): - // Clobbers R10, R11, RAX and EFLAGS. - // Updates RSP. - BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) - .addExternalSymbol("___chkstk") - .addReg(X86::RAX, RegState::Implicit) - .addReg(X86::RSP, RegState::Implicit) - .addReg(X86::RAX, RegState::Define | RegState::Implicit) - .addReg(X86::RSP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { - // __chkstk(MSVCRT): does not update stack pointer. - // Clobbers R10, R11 and EFLAGS. - BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) - .addExternalSymbol("__chkstk") - .addReg(X86::RAX, RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - // RAX has the offset to be subtracted from RSP. - BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) - .addReg(X86::RSP) - .addReg(X86::RAX); - } - } else { - const char *StackProbeSymbol = (Subtarget->isTargetKnownWindowsMSVC() || - Subtarget->isTargetWindowsItanium()) - ? "_chkstk" - : "_alloca"; - - BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(X86::EAX, RegState::Implicit) - .addReg(X86::ESP, RegState::Implicit) - .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } + X86FrameLowering::emitStackProbeCall(*BB->getParent(), *BB, MI, DL); MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; diff --git a/llvm/test/CodeGen/X86/inalloca-stdcall.ll b/llvm/test/CodeGen/X86/inalloca-stdcall.ll index e5b07e2..65a0f77 100644 --- a/llvm/test/CodeGen/X86/inalloca-stdcall.ll +++ b/llvm/test/CodeGen/X86/inalloca-stdcall.ll @@ -6,6 +6,7 @@ declare x86_stdcallcc void @f(%Foo* inalloca %a) declare x86_stdcallcc void @i(i32 %a) define void @g() { +; CHECK-LABEL: _g: %b = alloca inalloca %Foo ; CHECK: movl $8, %eax ; CHECK: calll __chkstk diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll index a6b6536..aab2eea 100644 --- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 -code-model=large | FileCheck %s -check-prefix=L64 ; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI ; PR8777 ; PR8778 @@ -24,6 +25,13 @@ entry: ; W64: callq __chkstk ; W64: subq %rax, %rsp +; Use %r11 for the large model. +; L64: movq %rsp, %rbp +; L64: $4096, %rax +; L64: movabsq $__chkstk, %r11 +; L64: callq *%r11 +; L64: subq %rax, %rsp + ; Freestanding ; EFI: movq %rsp, %rbp ; EFI: $[[B0OFS:4096|4104]], %rsp @@ -33,8 +41,8 @@ entry: ; M64: leaq 15(%{{.*}}), %rax ; M64: andq $-16, %rax -; M64: callq ___chkstk -; M64-NOT: %rsp +; M64: callq ___chkstk_ms +; M64: subq %rax, %rsp ; M64: movq %rsp, %rax ; W64: leaq 15(%{{.*}}), %rax @@ -43,6 +51,13 @@ entry: ; W64: subq %rax, %rsp ; W64: movq %rsp, %rax +; L64: leaq 15(%{{.*}}), %rax +; L64: andq $-16, %rax +; L64: movabsq $__chkstk, %r11 +; L64: callq *%r11 +; L64: subq %rax, %rsp +; L64: movq %rsp, %rax + ; EFI: leaq 15(%{{.*}}), [[R1:%r.*]] ; EFI: andq $-16, [[R1]] ; EFI: movq %rsp, [[R64:%r.*]] @@ -84,7 +99,8 @@ entry: ; M64: leaq 15(%{{.*}}), %rax ; M64: andq $-16, %rax -; M64: callq ___chkstk +; M64: callq ___chkstk_ms +; M64: subq %rax, %rsp ; M64: movq %rsp, [[R2:%r.*]] ; M64: andq $-128, [[R2]] ; M64: movq [[R2]], %rsp |