[AMDGPU][GlobalISel] Revise handling of wide loads in RegBankSelect

When splitting loads in RegBankSelect G_EXTRACT_VECTOR_ELT were being added which could not be selected. Since invoking the legalizer will generate instructions that split and combine wide loads, we can remove the redundant repair instructions which are added by RegBankSelect. Differential Revision: https://reviews.llvm.org/D75547
author: Austin Kerbow <Austin.Kerbow@amd.com> 2020-05-11 17:24:03 -0700
committer: Austin Kerbow <Austin.Kerbow@amd.com> 2020-05-11 18:10:16 -0700
commit: 1429e4c3992d2a5f90de5879773b6cf5a050cb5e (patch)
tree: bbeb3531fa0a7ca730959676ab81e5543e7bdfbd /llvm/lib
parent: 91259bf9c68ef72cbc0ce9230665808d10dec416 (diff)
download: llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.zip
llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.tar.gz
llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.tar.bz2
2 files changed, 5 insertions, 121 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
index 4c0632e..600b351 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def
@@ -220,77 +220,6 @@ const RegisterBankInfo::ValueMapping *getValueMappingSplit64(unsigned BankID,
   return &ValMappingsSGPR64OnlyVGPR32[1];
 }
 
-const RegisterBankInfo::PartialMapping LoadSGPROnlyBreakDown[] {
-  /* 256-bit load */    {0, 256, SGPRRegBank},
-  /* 512-bit load */    {0, 512, SGPRRegBank},
-  /* 8 32-bit loads */  {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
-                        {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
-                        {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
-                        {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
-  /* 16 32-bit loads */ {0, 32, VGPRRegBank}, {32, 32, VGPRRegBank},
-                        {64, 32, VGPRRegBank}, {96, 32, VGPRRegBank},
-                        {128, 32, VGPRRegBank}, {160, 32, VGPRRegBank},
-                        {192, 32, VGPRRegBank}, {224, 32, VGPRRegBank},
-                        {256, 32, VGPRRegBank}, {288, 32, VGPRRegBank},
-                        {320, 32, VGPRRegBank}, {352, 32, VGPRRegBank},
-                        {384, 32, VGPRRegBank}, {416, 32, VGPRRegBank},
-                        {448, 32, VGPRRegBank}, {480, 32, VGPRRegBank},
-  /* 4 64-bit loads */  {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
-                        {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
-  /* 8 64-bit loads */  {0, 64, VGPRRegBank}, {64, 64, VGPRRegBank},
-                        {128, 64, VGPRRegBank}, {192, 64, VGPRRegBank},
-                        {256, 64, VGPRRegBank}, {320, 64, VGPRRegBank},
-                        {384, 64, VGPRRegBank}, {448, 64, VGPRRegBank},
-
-  /* FIXME: The generic register bank select does not support complex
-   * break downs where the number of vector elements does not equal the
-   * number of breakdowns.
-   * FIXME: register bank select now tries to handle complex break downs,
-   * but it emits an illegal instruction:
-   * %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS %2:vgpr(s128), %3:vgpr(s128)
-   */
-  /* 2 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
-  /* 4 128-bit loads */ {0, 128, VGPRRegBank}, {128, 128, VGPRRegBank},
-                        {256, 128, VGPRRegBank}, {384, 128, VGPRRegBank}
-};
-
-const RegisterBankInfo::ValueMapping ValMappingsLoadSGPROnly[] {
-  /* 256-bit load */     {&LoadSGPROnlyBreakDown[0], 1},
-  /* 512-bit load */     {&LoadSGPROnlyBreakDown[1], 1},
-  /* <8 x i32> load  */  {&LoadSGPROnlyBreakDown[2], 8},
-  /* <16 x i32> load */  {&LoadSGPROnlyBreakDown[10], 16},
-  /* <4 x i64> load */   {&LoadSGPROnlyBreakDown[26], 4},
-  /* <8 x i64> load */   {&LoadSGPROnlyBreakDown[30], 8}
-};
-
-const RegisterBankInfo::ValueMapping *
-getValueMappingLoadSGPROnly(unsigned BankID, LLT SizeTy) {
-  unsigned Size = SizeTy.getSizeInBits();
-  if (Size < 256 || BankID == AMDGPU::SGPRRegBankID)
-    return getValueMapping(BankID, Size);
-
-  assert((Size == 256 || Size == 512) && BankID == AMDGPU::VGPRRegBankID);
-
-  // Default to using the non-split ValueMappings, we will use these if
-  // the register bank is SGPR or if we don't know how to handle the vector
-  // type.
-  unsigned Idx = Size == 256 ? 0 : 1;
-
-  // We need to split this load if it has a vgpr pointer.
-  if (BankID == AMDGPU::VGPRRegBankID) {
-    if (SizeTy == LLT::vector(8, 32))
-      Idx = 2;
-    else if (SizeTy == LLT::vector(16, 32))
-      Idx = 3;
-    else if (SizeTy == LLT::vector(4, 64))
-      Idx = 4;
-    else if (SizeTy == LLT::vector(8, 64))
-      Idx = 5;
-  }
-
-  return &ValMappingsLoadSGPROnly[Idx];
-}
-
 
 } // End AMDGPU namespace.
 } // End llvm namespace.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 64bb54c..0a920c1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -541,7 +541,6 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     LLT PtrTy = MRI.getType(MI.getOperand(1).getReg());
     unsigned PtrSize = PtrTy.getSizeInBits();
     unsigned AS = PtrTy.getAddressSpace();
-    LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
 
     if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS &&
          AS != AMDGPUAS::PRIVATE_ADDRESS) &&
@@ -555,9 +554,10 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings(
     }
 
     const InstructionMapping &VVMapping = getInstructionMapping(
-        2, 1, getOperandsMapping(
-          {AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy),
-           AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
+        2, 1,
+        getOperandsMapping(
+            {AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size),
+             AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize)}),
         2); // Num Operands
     AltMappings.push_back(&VVMapping);
 
@@ -1102,23 +1102,6 @@ void AMDGPURegisterBankInfo::constrainOpWithReadfirstlane(
   MI.getOperand(OpIdx).setReg(SGPR);
 }
 
-// When regbankselect repairs registers, it will insert a repair instruction
-// which defines the repaired register.  Then it calls applyMapping and expects
-// that the targets will either delete or rewrite the originally wrote to the
-// repaired registers.  Beccause of this, we end up in a situation where
-// we have 2 instructions defining the same registers.
-static MachineInstr *getOtherVRegDef(const MachineRegisterInfo &MRI,
-                                     Register Reg,
-                                     const MachineInstr &MI) {
-  // Is there some way we can assert that there are exactly 2 def instructions?
-  for (MachineInstr &Other : MRI.def_instructions(Reg)) {
-    if (&Other != &MI)
-      return &Other;
-  }
-
-  return nullptr;
-}
-
 bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
                         const AMDGPURegisterBankInfo::OperandsMapper &OpdMapper,
                                               MachineRegisterInfo &MRI) const {
@@ -1144,11 +1127,6 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
 
   assert(LoadSize % MaxNonSmrdLoadSize == 0);
 
-  // We want to get the repair instruction now, because it will help us
-  // determine which instruction the legalizer inserts that will also
-  // write to DstReg.
-  MachineInstr *RepairInst = getOtherVRegDef(MRI, DstReg, MI);
-
   // RegBankSelect only emits scalar types, so we need to reset the pointer
   // operand to a pointer type.
   Register BasePtrReg = SrcRegs[0];
@@ -1167,26 +1145,6 @@ bool AMDGPURegisterBankInfo::applyMappingWideLoad(MachineInstr &MI,
   if (Helper.fewerElementsVector(MI, 0, LoadSplitTy) != LegalizerHelper::Legalized)
     return false;
 
-  // At this point, the legalizer has split the original load into smaller
-  // loads.  At the end of lowering, it inserts an instruction (LegalizedInst)
-  // that combines the outputs of the lower loads and writes it to DstReg.
-  // The register bank selector has also added the RepairInst which writes to
-  // DstReg as well.
-
-  MachineInstr *LegalizedInst = getOtherVRegDef(MRI, DstReg, *RepairInst);
-
-  // Replace the output of the LegalizedInst with a temporary register, since
-  // RepairInst already defines DstReg.
-  Register TmpReg = MRI.createGenericVirtualRegister(MRI.getType(DstReg));
-  LegalizedInst->getOperand(0).setReg(TmpReg);
-  B.setInsertPt(*RepairInst->getParent(), RepairInst);
-
-  for (unsigned DefIdx = 0, e = DefRegs.size(); DefIdx != e; ++DefIdx) {
-    Register IdxReg = B.buildConstant(LLT::scalar(32), DefIdx).getReg(0);
-    MRI.setRegBank(IdxReg, AMDGPU::VGPRRegBank);
-    B.buildExtractVectorElement(DefRegs[DefIdx], TmpReg, IdxReg);
-  }
-
   MRI.setRegBank(DstReg, AMDGPU::VGPRRegBank);
   return true;
 }
@@ -2972,7 +2930,6 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   SmallVector<const ValueMapping*, 2> OpdsMapping(2);
   unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, *TRI);
-  LLT LoadTy = MRI.getType(MI.getOperand(0).getReg());
   Register PtrReg = MI.getOperand(1).getReg();
   LLT PtrTy = MRI.getType(PtrReg);
   unsigned AS = PtrTy.getAddressSpace();
@@ -2998,11 +2955,9 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const {
         AMDGPU::VGPRRegBankID : AMDGPU::SGPRRegBankID;
 
       PtrMapping = AMDGPU::getValueMapping(PtrBankID, PtrSize);
-      ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID,
-                                                       LoadTy);
     }
   } else {
-    ValMapping = AMDGPU::getValueMappingLoadSGPROnly(AMDGPU::VGPRRegBankID, LoadTy);
+    ValMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size);
     PtrMapping = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, PtrSize);
   }
author	Austin Kerbow <Austin.Kerbow@amd.com>	2020-05-11 17:24:03 -0700
committer	Austin Kerbow <Austin.Kerbow@amd.com>	2020-05-11 18:10:16 -0700
commit	1429e4c3992d2a5f90de5879773b6cf5a050cb5e (patch)
tree	bbeb3531fa0a7ca730959676ab81e5543e7bdfbd /llvm/lib
parent	91259bf9c68ef72cbc0ce9230665808d10dec416 (diff)
download	llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.zip llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.tar.gz llvm-1429e4c3992d2a5f90de5879773b6cf5a050cb5e.tar.bz2