diff options
author | Austin Kerbow <Austin.Kerbow@amd.com> | 2020-05-11 17:24:03 -0700 |
---|---|---|
committer | Austin Kerbow <Austin.Kerbow@amd.com> | 2020-05-11 18:10:16 -0700 |
commit | 1429e4c3992d2a5f90de5879773b6cf5a050cb5e (patch) | |
tree | bbeb3531fa0a7ca730959676ab81e5543e7bdfbd /llvm/lib | |
parent | 91259bf9c68ef72cbc0ce9230665808d10dec416 (diff) | |
download | llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.zip llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.tar.gz llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.tar.bz2 |
[AMDGPU][GlobalISel] Revise handling of wide loads in RegBankSelect
When splitting loads in RegBankSelect G_EXTRACT_VECTOR_ELT were being added
which could not be selected. Since invoking the legalizer will generate
instructions that split and combine wide loads, we can remove the redundant
repair instructions which are added by RegBankSelect.
Differential Revision: https://reviews.llvm.org/D75547
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def | 71 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 55 |
2 files changed, 5 insertions, 121 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index 4c0632e..600b351 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -220,77 +220,6 @@ const RegisterBankInfo::ValueMapping *getValueMappingSplit64(unsigned BankID, return &ValMappingsSGPR64OnlyVGPR32[1]; } -const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] { - /* 256-bit load */ {0, 256, SGPRRegBank}, - /* 512-bit load */ {0, 512, SGPRRegBank}, - /* 8 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank}, - {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank}, - {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank}, - {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank}, - /* 16 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank}, - {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank}, - {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank}, - {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank}, - {256, 32, VGPRRegBank}, {288, 32, VGPRRegBank}, - {320, 32, VGPRRegBank}, {352, 32, VGPRRegBank}, - {384, 32, VGPRRegBank}, {416, 32, VGPRRegBank}, - {448, 32, VGPRRegBank}, {480, 32, VGPRRegBank}, - /* 4 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank}, - {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank}, - /* 8 64-bit loads */ {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank}, - {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank}, - {256, 64, VGPRRegBank}, {320, 64, VGPRRegBank}, - {384, 64, VGPRRegBank}, {448, 64, VGPRRegBank}, - - /* FIXME: The generic register bank select does not support complex - * break downs where the number of vector elements does not equal the - * number of breakdowns. - * FIXME: register bank select now tries to handle complex break downs, - * but it emits an illegal instruction: - * %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS %2:vgpr(s128), %3:vgpr(s128) - */ - /* 2 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank}, - /* 4 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank}, - {256, 128, VGPRRegBank}, {384, 128, VGPRRegBank} -}; - -const RegisterBankInfo::ValueMapping ValMappingsLoadSGPROnly[] { - /* 256-bit load */ {&LoadSGPROnlyBreakDown[0], 1}, - /* 512-bit load */ {&LoadSGPROnlyBreakDown[1], 1}, - /* <8 x i32> load */ {&LoadSGPROnlyBreakDown[2], 8}, - /* <16 x i32> load */ {&LoadSGPROnlyBreakDown[10], 16}, - /* <4 x i64> load */ {&LoadSGPROnlyBreakDown[26], 4}, - /* <8 x i64> load */ {&LoadSGPROnlyBreakDown[30], 8} -}; - -const RegisterBankInfo::ValueMapping * -getValueMappingLoadSGPROnly(unsigned BankID, LLT SizeTy) { - unsigned Size = SizeTy.getSizeInBits(); - if (Size < 256 || BankID == AMDGPU::SGPRRegBankID) - return getValueMapping(BankID, Size); - - assert((Size == 256 || Size == 512) && BankID == AMDGPU::VGPRRegBankID); - - // Default to using the non-split ValueMappings, we will use these if - // the register bank is SGPR or if we don't know how to handle the vector - // type. - unsigned Idx = Size == 256 ? 0 : 1; - - // We need to split this load if it has a vgpr pointer. - if (BankID == AMDGPU::VGPRRegBankID) { - if (SizeTy == LLT::vector(8, 32)) - Idx = 2; - else if (SizeTy == LLT::vector(16, 32)) - Idx = 3; - else if (SizeTy == LLT::vector(4, 64)) - Idx = 4; - else if (SizeTy == LLT::vector(8, 64)) - Idx = 5; - } - - return &ValMappingsLoadSGPROnly[Idx]; -} - } // End AMDGPU namespace. } // End llvm namespace. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 64bb54c..0a920c1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -541,7 +541,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( LLT PtrTy = MRI.getType(MI.getOperand(1).getReg()); unsigned PtrSize = PtrTy.getSizeInBits(); unsigned AS = PtrTy.getAddressSpace(); - LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && AS != AMDGPUAS::PRIVATE_ADDRESS) && @@ -555,9 +554,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( } const InstructionMapping &VVMapping = getInstructionMapping( - 2, 1, getOperandsMapping( - {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy), - AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), + 2, 1, + getOperandsMapping( + {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size), + AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}), 2); // Num Operands AltMappings.push_back(&VVMapping); @@ -1102,23 +1102,6 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane( MI.getOperand(OpIdx).setReg(SGPR); } -// When regbankselect repairs registers, it will insert a repair instruction -// which defines the repaired register. Then it calls applyMapping and expects -// that the targets will either delete or rewrite the originally wrote to the -// repaired registers. Beccause of this, we end up in a situation where -// we have 2 instructions defining the same registers. -static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI, - Register Reg, - const MachineInstr &MI) { - // Is there some way we can assert that there are exactly 2 def instructions? - for (MachineInstr &Other : MRI.def_instructions(Reg)) { - if (&Other != &MI) - return &Other; - } - - return nullptr; -} - bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper, MachineRegisterInfo &MRI) const { @@ -1144,11 +1127,6 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, assert(LoadSize % MaxNonSmrdLoadSize == 0); - // We want to get the repair instruction now, because it will help us - // determine which instruction the legalizer inserts that will also - // write to DstReg. - MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI); - // RegBankSelect only emits scalar types, so we need to reset the pointer // operand to a pointer type. Register BasePtrReg = SrcRegs[0]; @@ -1167,26 +1145,6 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI, if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized) return false; - // At this point, the legalizer has split the original load into smaller - // loads. At the end of lowering, it inserts an instruction (LegalizedInst) - // that combines the outputs of the lower loads and writes it to DstReg. - // The register bank selector has also added the RepairInst which writes to - // DstReg as well. - - MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst); - - // Replace the output of the LegalizedInst with a temporary register, since - // RepairInst already defines DstReg. - Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg)); - LegalizedInst->getOperand(0).setReg(TmpReg); - B.setInsertPt(*RepairInst->getParent(), RepairInst); - - for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) { - Register IdxReg = B.buildConstant(LLT::scalar(32), DefIdx).getReg(0); - MRI.setRegBank(IdxReg, AMDGPU::VGPRRegBank); - B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg); - } - MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank); return true; } @@ -2972,7 +2930,6 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); SmallVector<const ValueMapping*, 2> OpdsMapping(2); unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI); - LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); Register PtrReg = MI.getOperand(1).getReg(); LLT PtrTy = MRI.getType(PtrReg); unsigned AS = PtrTy.getAddressSpace(); @@ -2998,11 +2955,9 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID; PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize); - ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, - LoadTy); } } else { - ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy); + ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize); } |