diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPU.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp | 90 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp | 2 | 
11 files changed, 74 insertions, 50 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index cd8b249..67042b7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -69,7 +69,7 @@ FunctionPass *createAMDGPUPreloadKernArgPrologLegacyPass();  ModulePass *createAMDGPUPreloadKernelArgumentsLegacyPass(const TargetMachine *);  struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { -  AMDGPUSimplifyLibCallsPass() {} +  AMDGPUSimplifyLibCallsPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  }; @@ -371,13 +371,13 @@ public:  class AMDGPUAnnotateUniformValuesPass      : public PassInfoMixin<AMDGPUAnnotateUniformValuesPass> {  public: -  AMDGPUAnnotateUniformValuesPass() {} +  AMDGPUAnnotateUniformValuesPass() = default;    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);  };  class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> {  public: -  SIModeRegisterPass() {} +  SIModeRegisterPass() = default;    PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM);  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h index 1064e57..dad94b8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUArgumentUsageInfo.h @@ -96,7 +96,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {  }  struct KernArgPreloadDescriptor : public ArgDescriptor { -  KernArgPreloadDescriptor() {} +  KernArgPreloadDescriptor() = default;    SmallVector<MCRegister> Regs;  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index 9907c88f..8669978 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1555,7 +1555,7 @@ private:    AMDGPU::ClusterDimsAttr Attr; -  static constexpr const char AttrName[] = "amdgpu-cluster-dims"; +  static constexpr char AttrName[] = "amdgpu-cluster-dims";  };  AAAMDGPUClusterDims & diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h index cf2ab825..a3be0f5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.h @@ -48,7 +48,7 @@ private:    FuncInfoMap FIM;  public: -  AMDGPUPerfHintAnalysis() {} +  AMDGPUPerfHintAnalysis() = default;    // OldPM    bool runOnSCC(const GCNTargetMachine &TM, CallGraphSCC &SCC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 103cdec..1e5885a2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -202,7 +202,7 @@ bool PredicateMapping::match(const MachineInstr &MI,    return true;  } -SetOfRulesForOpcode::SetOfRulesForOpcode() {} +SetOfRulesForOpcode::SetOfRulesForOpcode() = default;  SetOfRulesForOpcode::SetOfRulesForOpcode(FastRulesTypes FastTypes)      : FastTypes(FastTypes) {} @@ -913,6 +913,8 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,    addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}}); +  addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}}); +    addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)        .Uni(S64, {{Sgpr64}, {}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 733c5d5..fe81a5e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -181,14 +181,52 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(    return NewRetBlock;  } +static BasicBlock * +createDummyReturnBlock(Function &F, +                       SmallVector<BasicBlock *, 4> &ReturningBlocks) { +  BasicBlock *DummyReturnBB = +      BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F); +  Type *RetTy = F.getReturnType(); +  Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); +  ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); +  ReturningBlocks.push_back(DummyReturnBB); +  return DummyReturnBB; +} + +/// Handle conditional branch instructions (-> 2 targets) and callbr +/// instructions with N targets. +static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI, +                          BasicBlock *DummyReturnBB, +                          std::vector<DominatorTree::UpdateType> &Updates) { +  SmallVector<BasicBlock *, 2> Successors(successors(BB)); + +  // Create a new transition block to hold the conditional branch. +  BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); + +  Updates.reserve(Updates.size() + 2 * Successors.size() + 2); + +  // 'Successors' become successors of TransitionBB instead of BB, +  // and TransitionBB becomes a single successor of BB. +  Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); +  for (BasicBlock *Successor : Successors) { +    Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); +    Updates.emplace_back(DominatorTree::Delete, BB, Successor); +  } + +  // Create a branch that will always branch to the transition block and +  // references DummyReturnBB. +  BB->getTerminator()->eraseFromParent(); +  BranchInst::Create(TransitionBB, DummyReturnBB, +                     ConstantInt::getTrue(F.getContext()), BB); +  Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +} +  bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,                                              const PostDominatorTree &PDT,                                              const UniformityInfo &UA) { -  assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); -    if (PDT.root_size() == 0 ||        (PDT.root_size() == 1 && -       !isa<BranchInst>(PDT.getRoot()->getTerminator()))) +       !isa<BranchInst, CallBrInst>(PDT.getRoot()->getTerminator())))      return false;    // Loop over all of the blocks in a function, tracking all of the blocks that @@ -222,46 +260,28 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,        if (HasDivergentExitBlock)          UnreachableBlocks.push_back(BB);      } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { - -      ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext()); -      if (DummyReturnBB == nullptr) { -        DummyReturnBB = BasicBlock::Create(F.getContext(), -                                           "DummyReturnBlock", &F); -        Type *RetTy = F.getReturnType(); -        Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); -        ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); -        ReturningBlocks.push_back(DummyReturnBB); -      } +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks);        if (BI->isUnconditional()) {          BasicBlock *LoopHeaderBB = BI->getSuccessor(0);          BI->eraseFromParent(); // Delete the unconditional branch.          // Add a new conditional branch with a dummy edge to the return block. -        BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB); -        Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); -      } else { // Conditional branch. -        SmallVector<BasicBlock *, 2> Successors(successors(BB)); - -        // Create a new transition block to hold the conditional branch. -        BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); - -        Updates.reserve(Updates.size() + 2 * Successors.size() + 2); - -        // 'Successors' become successors of TransitionBB instead of BB, -        // and TransitionBB becomes a single successor of BB. -        Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); -        for (BasicBlock *Successor : Successors) { -          Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); -          Updates.emplace_back(DominatorTree::Delete, BB, Successor); -        } - -        // Create a branch that will always branch to the transition block and -        // references DummyReturnBB. -        BB->getTerminator()->eraseFromParent(); -        BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB); +        BranchInst::Create(LoopHeaderBB, DummyReturnBB, +                           ConstantInt::getTrue(F.getContext()), BB);          Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); +      } else { +        handleNBranch(F, BB, BI, DummyReturnBB, Updates);        }        Changed = true; +    } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(BB->getTerminator())) { +      if (!DummyReturnBB) +        DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks); + +      handleNBranch(F, BB, CBI, DummyReturnBB, Updates); +      Changed = true; +    } else { +      llvm_unreachable("unsupported block terminator");      }    } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp index 61c5dcd..ded2f5a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp @@ -54,7 +54,7 @@ public:    bool CullSGPRHazardsAtMemWait;    unsigned CullSGPRHazardsMemWaitThreshold; -  AMDGPUWaitSGPRHazards() {} +  AMDGPUWaitSGPRHazards() = default;    // Return the numeric ID 0-127 for a given SGPR.    static std::optional<unsigned> sgprNumber(Register Reg, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 975781f..f357981 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -183,7 +183,7 @@ class ScheduleMetrics {    unsigned BubbleCycles;  public: -  ScheduleMetrics() {} +  ScheduleMetrics() = default;    ScheduleMetrics(unsigned L, unsigned BC)        : ScheduleLength(L), BubbleCycles(BC) {}    unsigned getLength() const { return ScheduleLength; } @@ -217,7 +217,7 @@ class RegionPressureMap {    bool IsLiveOut;  public: -  RegionPressureMap() {} +  RegionPressureMap() = default;    RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)        : DAG(GCNDAG), IsLiveOut(LiveOut) {}    // Build the Instr->LiveReg and RegionIdx->Instr maps diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 5c39f7a..aa5ea77 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -2170,7 +2170,9 @@ bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {      return MFI.getStackSize() != 0;    } -  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() || +  return (frameTriviallyRequiresSP(MFI) && +          !MF.getInfo<SIMachineFunctionInfo>()->isChainFunction()) || +         MFI.isFrameAddressTaken() ||           MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(               MF) ||           mayReserveScratchForCWSR(MF) || diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index d9f76c9..45f5919 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6153,7 +6153,7 @@ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,    // information.    if (AMDGPU::isPackedFP32Inst(MI.getOpcode()) && AMDGPU::isGFX12Plus(ST) &&        MO.isReg() && RI.isSGPRReg(MRI, MO.getReg())) { -    constexpr const AMDGPU::OpName OpNames[] = { +    constexpr AMDGPU::OpName OpNames[] = {          AMDGPU::OpName::src0, AMDGPU::OpName::src1, AMDGPU::OpName::src2};      for (auto [I, OpName] : enumerate(OpNames)) { @@ -6215,8 +6215,8 @@ bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,  bool SIInstrInfo::isLegalGFX12PlusPackedMathFP32Operand(      const MachineRegisterInfo &MRI, const MachineInstr &MI, unsigned SrcN,      const MachineOperand *MO) const { -  constexpr const unsigned NumOps = 3; -  constexpr const AMDGPU::OpName OpNames[NumOps * 2] = { +  constexpr unsigned NumOps = 3; +  constexpr AMDGPU::OpName OpNames[NumOps * 2] = {        AMDGPU::OpName::src0,           AMDGPU::OpName::src1,        AMDGPU::OpName::src2,           AMDGPU::OpName::src0_modifiers,        AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src2_modifiers}; diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 7431e11..abefa32 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -296,7 +296,7 @@ bool SIPreEmitPeephole::optimizeSetGPR(MachineInstr &First,    for (MachineBasicBlock::instr_iterator I = std::next(First.getIterator()),                                           E = MI.getIterator();         I != E; ++I) { -    if (I->isBundle()) +    if (I->isBundle() || I->isDebugInstr())        continue;      switch (I->getOpcode()) {      case AMDGPU::S_SET_GPR_IDX_MODE:  | 
