diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 32 |
1 files changed, 29 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 14101e5..3d8d274 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -374,8 +374,10 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, return true; } - unsigned ReturnOpc = - IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN; + const bool IsWholeWave = MFI->isWholeWaveFunction(); + unsigned ReturnOpc = IsWholeWave ? AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN + : IsShader ? AMDGPU::SI_RETURN_TO_EPILOG + : AMDGPU::SI_RETURN; auto Ret = B.buildInstrNoInsert(ReturnOpc); if (!FLI.CanLowerReturn) @@ -383,6 +385,9 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; + if (IsWholeWave) + addOriginalExecToReturn(B.getMF(), Ret); + // TODO: Handle CalleeSavedRegsViaCopy. B.insertInstr(Ret); @@ -632,6 +637,17 @@ bool AMDGPUCallLowering::lowerFormalArguments( if (DL.getTypeStoreSize(Arg.getType()) == 0) continue; + if (Info->isWholeWaveFunction() && Idx == 0) { + assert(VRegs[Idx].size() == 1 && "Expected only one register"); + + // The first argument for whole wave functions is the original EXEC value. + B.buildInstr(AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_SETUP) + .addDef(VRegs[Idx][0]); + + ++Idx; + continue; + } + const bool InReg = Arg.hasAttribute(Attribute::InReg); if (Arg.hasAttribute(Attribute::SwiftSelf) || @@ -1347,6 +1363,7 @@ bool AMDGPUCallLowering::lowerTailCall( SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs; if (Info.CallConv != CallingConv::AMDGPU_Gfx && + Info.CallConv != CallingConv::AMDGPU_Gfx_WholeWave && !AMDGPU::isChainCC(Info.CallConv)) { // With a fixed ABI, allocate fixed registers before user arguments. if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) @@ -1524,7 +1541,8 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // after the ordinary user argument registers. SmallVector<std::pair<MCRegister, Register>, 12> ImplicitArgRegs; - if (Info.CallConv != CallingConv::AMDGPU_Gfx) { + if (Info.CallConv != CallingConv::AMDGPU_Gfx && + Info.CallConv != CallingConv::AMDGPU_Gfx_WholeWave) { // With a fixed ABI, allocate fixed registers before user arguments. if (!passSpecialInputs(MIRBuilder, CCInfo, ImplicitArgRegs, Info)) return false; @@ -1592,3 +1610,11 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return true; } + +void AMDGPUCallLowering::addOriginalExecToReturn( + MachineFunction &MF, MachineInstrBuilder &Ret) const { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + const SIInstrInfo *TII = ST.getInstrInfo(); + const MachineInstr *Setup = TII->getWholeWaveFunctionSetup(MF); + Ret.addReg(Setup->getOperand(0).getReg()); +} |