diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-09-15 15:41:53 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2014-09-15 15:41:53 +0000 |
commit | 3f98140c87728cec3247e2dbe0a9990e48afa984 (patch) | |
tree | 221a264a783c12b722ae3e15e68f0798a35ce101 /llvm/lib/Target/R600/SILowerControlFlow.cpp | |
parent | 65f67e4dfe837d8aff8d9b30cecc7247466d6f52 (diff) | |
download | llvm-3f98140c87728cec3247e2dbe0a9990e48afa984.zip llvm-3f98140c87728cec3247e2dbe0a9990e48afa984.tar.gz llvm-3f98140c87728cec3247e2dbe0a9990e48afa984.tar.bz2 |
R600/SI: Add preliminary support for flat address space
llvm-svn: 217777
Diffstat (limited to 'llvm/lib/Target/R600/SILowerControlFlow.cpp')
-rw-r--r-- | llvm/lib/Target/R600/SILowerControlFlow.cpp | 46 |
1 files changed, 45 insertions, 1 deletions
diff --git a/llvm/lib/Target/R600/SILowerControlFlow.cpp b/llvm/lib/Target/R600/SILowerControlFlow.cpp index 97c706b..59270ee 100644 --- a/llvm/lib/Target/R600/SILowerControlFlow.cpp +++ b/llvm/lib/Target/R600/SILowerControlFlow.cpp @@ -52,6 +52,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -451,6 +452,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { bool HaveKill = false; bool NeedM0 = false; bool NeedWQM = false; + bool NeedFlat = false; unsigned Depth = 0; for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); @@ -467,6 +469,12 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { NeedWQM = true; } + // Flat uses m0 in case it needs to access LDS. + if (TII->isFLAT(MI.getOpcode())) { + NeedM0 = true; + NeedFlat = true; + } + switch (MI.getOpcode()) { default: break; case AMDGPU::SI_IF: @@ -532,7 +540,6 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { case AMDGPU::V_INTERP_MOV_F32: NeedWQM = true; break; - } } } @@ -550,5 +557,42 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { AMDGPU::EXEC).addReg(AMDGPU::EXEC); } + // FIXME: This seems inappropriate to do here. + if (NeedFlat && MFI->IsKernel) { + // Insert the prologue initializing the SGPRs pointing to the scratch space + // for flat accesses. + const MachineFrameInfo *FrameInfo = MF.getFrameInfo(); + + // TODO: What to use with function calls? + + // FIXME: This is reporting stack size that is used in a scratch buffer + // rather than registers as well. + uint64_t StackSizeBytes = FrameInfo->getStackSize(); + + int IndirectBegin + = static_cast<const AMDGPUInstrInfo*>(TII)->getIndirectIndexBegin(MF); + // Convert register index to 256-byte unit. + uint64_t StackOffset = IndirectBegin < 0 ? 0 : (4 * IndirectBegin / 256); + + assert((StackSizeBytes < 0xffff) && StackOffset < 0xffff && + "Stack limits should be smaller than 16-bits"); + + // Initialize the flat scratch register pair. + // TODO: Can we use one s_mov_b64 here? + + // Offset is in units of 256-bytes. + MachineBasicBlock &MBB = MF.front(); + DebugLoc NoDL; + MachineBasicBlock::iterator Start = MBB.getFirstNonPHI(); + const MCInstrDesc &SMovK = TII->get(AMDGPU::S_MOVK_I32); + + BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_LO) + .addImm(StackOffset); + + // Documentation says size is "per-thread scratch size in bytes" + BuildMI(MBB, Start, NoDL, SMovK, AMDGPU::FLAT_SCR_HI) + .addImm(StackSizeBytes); + } + return true; } |